Adding clang-format, and reformating all sourcecode

This commit is contained in:
Ian Roddis
2021-09-21 09:41:11 -03:00
parent 39d5ae08be
commit 288ce28d29
36 changed files with 3355 additions and 2802 deletions

198
.clang-format Normal file
View File

@@ -0,0 +1,198 @@
---
DisableFormat: false
Language: Cpp
Standard: Auto
# Indentation rules
IndentWidth: 2
AccessModifierOffset: -2
ConstructorInitializerIndentWidth: 2
ContinuationIndentWidth: 4
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWrappedFunctionNames: false
NamespaceIndentation: All
UseTab: Never
TabWidth: 8
# Brace wrapping rules
BreakBeforeBraces: Custom
BraceWrapping:
AfterEnum: true
AfterClass: true
AfterStruct: true
AfterUnion: true
AfterNamespace: false
AfterExternBlock: false
AfterCaseLabel: false
AfterControlStatement: false
AfterFunction: true
BeforeCatch: true
BeforeElse: true
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
# Line break rules
DeriveLineEnding: true
UseCRLF: false
KeepEmptyLinesAtTheStartOfBlocks: false
MaxEmptyLinesToKeep: 1
BinPackArguments: true
BinPackParameters: true
ExperimentalAutoDetectBinPacking: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BreakInheritanceList: BeforeComma
BreakConstructorInitializers: BeforeComma
BreakBeforeInheritanceComma: true
BreakConstructorInitializersBeforeComma: true
BreakBeforeBinaryOperators: None
BreakBeforeTernaryOperators: true
BreakStringLiterals: true
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowAllConstructorInitializersOnNextLine: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: false
# Line length rules
ColumnLimit: 80
ReflowComments: true
## line length penalties
## these determine where line breaks are inserted when over ColumnLimit
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
# Alignment rules
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: false
AlignConsecutiveMacros: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
DerivePointerAlignment: true
PointerAlignment: Left
# Include ordering rules
IncludeBlocks: Regroup
SortIncludes: true
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IncludeCategories:
- Regex: '^".*\.h"'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
# Namespace rules
CompactNamespaces: true
FixNamespaceComments: true
# Language extention macros
CommentPragmas: '^ IWYU pragma:'
MacroBlockBegin: ''
MacroBlockEnd: ''
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
# Spacing rules
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInParentheses: false
SpacesInSquareBrackets: false
# Rules for detecting embedded code blocks
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
# C++ specific rules
Cpp11BracedListStyle: true
SortUsingDeclarations: true
...

View File

@@ -1,15 +1,15 @@
#pragma once #pragma once
#include <iostream>
#include <deque> #include <deque>
#include <functional>
#include <iostream>
#include <iterator>
#include <optional>
#include <queue>
#include <sstream>
#include <stdexcept> #include <stdexcept>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <iterator>
#include <functional>
#include <optional>
#include <sstream>
#include <queue>
#include "Defines.hpp" #include "Defines.hpp"
@@ -20,18 +20,20 @@
namespace daggy { namespace daggy {
template<typename K, typename V> template <typename K, typename V>
struct Vertex { struct Vertex
{
RunState state; RunState state;
uint32_t depCount; uint32_t depCount;
V data; V data;
std::unordered_set<K> children; std::unordered_set<K> children;
}; };
template <typename K, typename V>
template<typename K, typename V> class DAG
class DAG { {
using Edge = std::pair<K, K>; using Edge = std::pair<K, K>;
public: public:
// Vertices // Vertices
void addVertex(K id, V data); void addVertex(K id, V data);
@@ -41,7 +43,8 @@ namespace daggy {
// Edges // Edges
void addEdge(const K &src, const K &dst); void addEdge(const K &src, const K &dst);
void addEdgeIf(const K &src, std::function<bool(const Vertex<K, V> &v)> predicate); void addEdgeIf(const K &src,
std::function<bool(const Vertex<K, V> &v)> predicate);
bool isValid() const; bool isValid() const;
@@ -64,7 +67,8 @@ namespace daggy {
void setVertexState(const K &id, RunState state); void setVertexState(const K &id, RunState state);
void forEach(std::function<void(const std::pair<K, Vertex<K, V>> &)> fun) const; void forEach(
std::function<void(const std::pair<K, Vertex<K, V>> &)> fun) const;
bool allVisited() const; bool allVisited() const;
@@ -77,6 +81,6 @@ namespace daggy {
private: private:
std::unordered_map<K, Vertex<K, V>> vertices_; std::unordered_map<K, Vertex<K, V>> vertices_;
}; };
} } // namespace daggy
#include "DAG.impl.hxx" #include "DAG.impl.hxx"

View File

@@ -1,18 +1,31 @@
namespace daggy { namespace daggy {
template<typename K, typename V> template <typename K, typename V>
size_t DAG<K, V>::size() const { return vertices_.size(); } size_t DAG<K, V>::size() const
{
return vertices_.size();
}
template<typename K, typename V> template <typename K, typename V>
bool DAG<K, V>::empty() const { return vertices_.empty(); } bool DAG<K, V>::empty() const
{
return vertices_.empty();
}
template<typename K, typename V> template <typename K, typename V>
bool DAG<K, V>::hasVertex(const K &id) { return vertices_.count(id) != 0; } bool DAG<K, V>::hasVertex(const K &id)
{
return vertices_.count(id) != 0;
}
template<typename K, typename V> template <typename K, typename V>
Vertex <K, V> &DAG<K, V>::getVertex(const K &id) { return vertices_.at(id); } Vertex<K, V> &DAG<K, V>::getVertex(const K &id)
{
return vertices_.at(id);
}
template<typename K, typename V> template <typename K, typename V>
std::unordered_set<K> DAG<K, V>::getVertices() const { std::unordered_set<K> DAG<K, V>::getVertices() const
{
std::unordered_set<K> keys; std::unordered_set<K> keys;
for (const auto it : vertices_) { for (const auto it : vertices_) {
keys.insert(it.first); keys.insert(it.first);
@@ -20,51 +33,64 @@ namespace daggy {
return keys; return keys;
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::addVertex(K id, V data) { void DAG<K, V>::addVertex(K id, V data)
{
if (vertices_.count(id) != 0) { if (vertices_.count(id) != 0) {
std::stringstream ss; std::stringstream ss;
ss << "A vertex with ID " << id << " already exists in the DAG"; ss << "A vertex with ID " << id << " already exists in the DAG";
throw std::runtime_error(ss.str()); throw std::runtime_error(ss.str());
} }
vertices_.emplace(id, Vertex<K, V>{.state = RunState::QUEUED, .depCount = 0, .data = data}); vertices_.emplace(
id,
Vertex<K, V>{.state = RunState::QUEUED, .depCount = 0, .data = data});
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::addEdge(const K &from, const K &to) { void DAG<K, V>::addEdge(const K &from, const K &to)
if (vertices_.find(from) == vertices_.end()) throw std::runtime_error("No such vertex"); {
if (vertices_.find(to) == vertices_.end()) throw std::runtime_error("No such vertex"); if (vertices_.find(from) == vertices_.end())
throw std::runtime_error("No such vertex");
if (vertices_.find(to) == vertices_.end())
throw std::runtime_error("No such vertex");
vertices_.at(from).children.insert(to); vertices_.at(from).children.insert(to);
vertices_.at(to).depCount++; vertices_.at(to).depCount++;
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::addEdgeIf(const K &src, std::function<bool(const Vertex <K, V> &v)> predicate) { void DAG<K, V>::addEdgeIf(
auto & parent = vertices_.at(src); const K &src, std::function<bool(const Vertex<K, V> &v)> predicate)
for (auto &[name, vertex]: vertices_) { {
if (! predicate(vertex)) continue; auto &parent = vertices_.at(src);
if (name == src) continue; for (auto &[name, vertex] : vertices_) {
if (!predicate(vertex))
continue;
if (name == src)
continue;
parent.children.insert(name); parent.children.insert(name);
vertex.depCount++; vertex.depCount++;
} }
} }
template<typename K, typename V> template <typename K, typename V>
bool DAG<K, V>::isValid() const { bool DAG<K, V>::isValid() const
{
std::unordered_map<K, size_t> depCounts; std::unordered_map<K, size_t> depCounts;
std::queue<K> ready; std::queue<K> ready;
size_t processed = 0; size_t processed = 0;
for (const auto & [k, v] : vertices_) { for (const auto &[k, v] : vertices_) {
depCounts[k] = v.depCount; depCounts[k] = v.depCount;
if (v.depCount == 0) ready.push(k); if (v.depCount == 0)
ready.push(k);
} }
while (! ready.empty()) { while (!ready.empty()) {
const auto & k = ready.front(); const auto &k = ready.front();
for (const auto & child : vertices_.at(k).children) { for (const auto &child : vertices_.at(k).children) {
auto dc = --depCounts[child]; auto dc = --depCounts[child];
if (dc == 0) ready.push(child); if (dc == 0)
ready.push(child);
} }
processed++; processed++;
ready.pop(); ready.pop();
@@ -73,76 +99,85 @@ namespace daggy {
return processed == vertices_.size(); return processed == vertices_.size();
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::reset() { void DAG<K, V>::reset()
{
// Reset the state of all vertices // Reset the state of all vertices
for (auto &[_, v]: vertices_) { for (auto &[_, v] : vertices_) {
v.state = RunState::QUEUED; v.state = RunState::QUEUED;
v.depCount = 0; v.depCount = 0;
} }
// Calculate the upstream count // Calculate the upstream count
for (auto &[_, v]: vertices_) { for (auto &[_, v] : vertices_) {
for (auto c: v.children) { for (auto c : v.children) {
vertices_.at(c).depCount++; vertices_.at(c).depCount++;
} }
} }
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::resetRunning() { void DAG<K, V>::resetRunning()
for (auto &[k, v]: vertices_) { {
if (v.state != +RunState::RUNNING) continue; for (auto &[k, v] : vertices_) {
if (v.state != +RunState::RUNNING)
continue;
v.state = RunState::QUEUED; v.state = RunState::QUEUED;
} }
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::setVertexState(const K &id, RunState state) { void DAG<K, V>::setVertexState(const K &id, RunState state)
{
vertices_.at(id).state = state; vertices_.at(id).state = state;
} }
template<typename K, typename V> template <typename K, typename V>
bool DAG<K, V>::allVisited() const { bool DAG<K, V>::allVisited() const
for (const auto &[_, v]: vertices_) { {
if (v.state != +RunState::COMPLETED) return false; for (const auto &[_, v] : vertices_) {
if (v.state != +RunState::COMPLETED)
return false;
} }
return true; return true;
} }
template<typename K, typename V> template <typename K, typename V>
std::optional<std::pair<K, V>> std::optional<std::pair<K, V>> DAG<K, V>::visitNext()
DAG<K, V>::visitNext() { {
for (auto &[k, v]: vertices_) { for (auto &[k, v] : vertices_) {
if (v.state != +RunState::QUEUED) continue; if (v.state != +RunState::QUEUED)
if (v.depCount != 0) continue; continue;
if (v.depCount != 0)
continue;
v.state = RunState::RUNNING; v.state = RunState::RUNNING;
return std::make_pair(k, v.data); return std::make_pair(k, v.data);
} }
return {}; return {};
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::completeVisit(const K &id) { void DAG<K, V>::completeVisit(const K &id)
{
auto &v = vertices_.at(id); auto &v = vertices_.at(id);
v.state = RunState::COMPLETED; v.state = RunState::COMPLETED;
for (auto c: v.children) { for (auto c : v.children) {
--vertices_.at(c).depCount; --vertices_.at(c).depCount;
} }
} }
template<typename K, typename V> template <typename K, typename V>
void DAG<K, V>::forEach(std::function<void(const std::pair<K, Vertex < K, V>> &) void DAG<K, V>::forEach(std::function<void(const std::pair<K, Vertex<K, V>> &)
> fun) const { >
for ( fun) const
auto it = vertices_.begin(); {
it != vertices_. for (auto it = vertices_.begin(); it != vertices_.
end(); end();
++it) { ++it) {
fun(*it); fun(*it);
} }
} }
} } // namespace daggy

View File

@@ -1,5 +1,7 @@
#pragma once #pragma once
#include <enum.h>
#include <chrono> #include <chrono>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@@ -7,8 +9,6 @@
#include <variant> #include <variant>
#include <vector> #include <vector>
#include <enum.h>
namespace daggy { namespace daggy {
// Commands and parameters // Commands and parameters
using ConfigValue = std::variant<std::string, std::vector<std::string>>; using ConfigValue = std::variant<std::string, std::vector<std::string>>;
@@ -22,38 +22,36 @@ namespace daggy {
// DAG Runs // DAG Runs
using DAGRunID = size_t; using DAGRunID = size_t;
BETTER_ENUM(RunState, uint32_t, BETTER_ENUM(RunState, uint32_t, QUEUED = 1 << 0, RUNNING = 1 << 1,
QUEUED = 1 << 0, RETRY = 1 << 2, ERRORED = 1 << 3, KILLED = 1 << 4,
RUNNING = 1 << 1, COMPLETED = 1 << 5);
RETRY = 1 << 2,
ERRORED = 1 << 3,
KILLED = 1 << 4,
COMPLETED = 1 << 5
);
struct Task { struct Task
{
std::string definedName; std::string definedName;
bool isGenerator; // True if the output of this task is a JSON set of tasks to complete bool isGenerator; // True if the output of this task is a JSON set of tasks
// to complete
uint32_t maxRetries; uint32_t maxRetries;
uint32_t retryIntervalSeconds; // Time to wait between retries uint32_t retryIntervalSeconds; // Time to wait between retries
ConfigValues job; // It's up to the individual inspectors to convert values from strings // array of strings ConfigValues job; // It's up to the individual inspectors to convert values
// from strings // array of strings
std::unordered_set<std::string> children; std::unordered_set<std::string> children;
std::unordered_set<std::string> parents; std::unordered_set<std::string> parents;
bool operator==(const Task &other) const { bool operator==(const Task &other) const
return (definedName == other.definedName) {
and (maxRetries == other.maxRetries) return (definedName == other.definedName) and
and (retryIntervalSeconds == other.retryIntervalSeconds) (maxRetries == other.maxRetries) and
and (job == other.job) (retryIntervalSeconds == other.retryIntervalSeconds) and
and (children == other.children) (job == other.job) and (children == other.children) and
and (parents == other.parents) (parents == other.parents) and (isGenerator == other.isGenerator);
and (isGenerator == other.isGenerator);
} }
}; };
using TaskSet = std::unordered_map<std::string, Task>; using TaskSet = std::unordered_map<std::string, Task>;
struct AttemptRecord { struct AttemptRecord
{
TimePoint startTime; TimePoint startTime;
TimePoint stopTime; TimePoint stopTime;
int rc; // RC from the task int rc; // RC from the task
@@ -61,6 +59,6 @@ namespace daggy {
std::string outputLog; // stdout from command std::string outputLog; // stdout from command
std::string errorLog; // stderr from command std::string errorLog; // stderr from command
}; };
} } // namespace daggy
BETTER_ENUMS_DECLARE_STD_HASH(daggy::RunState) BETTER_ENUMS_DECLARE_STD_HASH(daggy::RunState)

View File

@@ -1,18 +1,19 @@
#pragma once #pragma once
#include <vector>
#include <string>
#include <variant>
#include <unordered_map>
#include <rapidjson/document.h> #include <rapidjson/document.h>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
#include "Defines.hpp" #include "Defines.hpp"
namespace rj = rapidjson; namespace rj = rapidjson;
namespace daggy { namespace daggy {
void checkRJParse(const rj::ParseResult &result, const std::string &prefix = ""); void checkRJParse(const rj::ParseResult &result,
const std::string &prefix = "");
// Parameters // Parameters
ConfigValues configFromJSON(const std::string &jsonSpec); ConfigValues configFromJSON(const std::string &jsonSpec);
@@ -22,12 +23,14 @@ namespace daggy {
std::string configToJSON(const ConfigValues &config); std::string configToJSON(const ConfigValues &config);
// Tasks // Tasks
Task Task taskFromJSON(const std::string &name, const rj::Value &spec,
taskFromJSON(const std::string &name, const rj::Value &spec, const ConfigValues &jobDefaults = {}); const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const std::string &jsonSpec, const ConfigValues &jobDefaults = {}); TaskSet tasksFromJSON(const std::string &jsonSpec,
const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults = {}); TaskSet tasksFromJSON(const rj::Value &spec,
const ConfigValues &jobDefaults = {});
std::string taskToJSON(const Task &task); std::string taskToJSON(const Task &task);
@@ -42,4 +45,4 @@ namespace daggy {
std::string timePointToString(const TimePoint &tp); std::string timePointToString(const TimePoint &tp);
TimePoint stringToTimePoint(const std::string &timeStr); TimePoint stringToTimePoint(const std::string &timeStr);
} } // namespace daggy

View File

@@ -1,26 +1,31 @@
#pragma once #pragma once
#include <filesystem>
#include <pistache/description.h> #include <pistache/description.h>
#include <pistache/endpoint.h> #include <pistache/endpoint.h>
#include <pistache/http.h> #include <pistache/http.h>
#include <filesystem>
#include "ThreadPool.hpp" #include "ThreadPool.hpp"
#include "loggers/dag_run/DAGRunLogger.hpp"
#include "executors/task/TaskExecutor.hpp" #include "executors/task/TaskExecutor.hpp"
#include "loggers/dag_run/DAGRunLogger.hpp"
namespace fs = std::filesystem; namespace fs = std::filesystem;
namespace daggy { namespace daggy {
class Server { class Server
{
public: public:
Server(const Pistache::Address &listenSpec, loggers::dag_run::DAGRunLogger &logger, Server(const Pistache::Address &listenSpec,
executors::task::TaskExecutor &executor, loggers::dag_run::DAGRunLogger &logger,
size_t nDAGRunners executors::task::TaskExecutor &executor, size_t nDAGRunners)
) : endpoint_(listenSpec)
: endpoint_(listenSpec), desc_("Daggy API", "0.1"), logger_(logger), executor_(executor), , desc_("Daggy API", "0.1")
runnerPool_(nDAGRunners) {} , logger_(logger)
, executor_(executor)
, runnerPool_(nDAGRunners)
{
}
Server &setWebHandlerThreads(size_t nThreads); Server &setWebHandlerThreads(size_t nThreads);
@@ -37,15 +42,20 @@ namespace daggy {
private: private:
void createDescription(); void createDescription();
void handleRunDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void handleRunDAG(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
void handleGetDAGRuns(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void handleGetDAGRuns(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
void handleGetDAGRun(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void handleGetDAGRun(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
void handleReady(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void handleReady(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
bool handleAuth(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter &response); bool handleAuth(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter &response);
Pistache::Http::Endpoint endpoint_; Pistache::Http::Endpoint endpoint_;
Pistache::Rest::Description desc_; Pistache::Rest::Description desc_;
@@ -55,4 +65,4 @@ namespace daggy {
executors::task::TaskExecutor &executor_; executors::task::TaskExecutor &executor_;
ThreadPool runnerPool_; ThreadPool runnerPool_;
}; };
} } // namespace daggy

View File

@@ -1,14 +1,14 @@
#pragma once #pragma once
#include <atomic> #include <atomic>
#include <condition_variable>
#include <functional>
#include <future>
#include <list>
#include <memory>
#include <queue>
#include <thread> #include <thread>
#include <vector> #include <vector>
#include <memory>
#include <condition_variable>
#include <future>
#include <queue>
#include <functional>
#include <list>
using namespace std::chrono_literals; using namespace std::chrono_literals;
@@ -21,16 +21,17 @@ namespace daggy {
one producer won't starve out another, but all tasks will be run one producer won't starve out another, but all tasks will be run
as quickly as possible. as quickly as possible.
*/ */
class TaskQueue { class TaskQueue
{
public: public:
template<class F, class... Args> template <class F, class... Args>
decltype(auto) addTask(F &&f, Args &&... args) { decltype(auto) addTask(F &&f, Args &&...args)
{
// using return_type = std::invoke_result<F, Args...>::type; // using return_type = std::invoke_result<F, Args...>::type;
using return_type = std::invoke_result_t<F, Args...>; using return_type = std::invoke_result_t<F, Args...>;
std::packaged_task<return_type()> task( std::packaged_task<return_type()> task(
std::bind(std::forward<F>(f), std::forward<Args>(args)...) std::bind(std::forward<F>(f), std::forward<Args>(args)...));
);
std::future<return_type> res = task.get_future(); std::future<return_type> res = task.get_future();
{ {
@@ -40,39 +41,49 @@ namespace daggy {
return res; return res;
} }
std::packaged_task<void()> pop() { std::packaged_task<void()> pop()
{
std::lock_guard<std::mutex> guard(mtx_); std::lock_guard<std::mutex> guard(mtx_);
auto task = std::move(tasks_.front()); auto task = std::move(tasks_.front());
tasks_.pop(); tasks_.pop();
return task; return task;
} }
size_t size() { size_t size()
{
std::lock_guard<std::mutex> guard(mtx_); std::lock_guard<std::mutex> guard(mtx_);
return tasks_.size(); return tasks_.size();
} }
bool empty() { bool empty()
{
std::lock_guard<std::mutex> guard(mtx_); std::lock_guard<std::mutex> guard(mtx_);
return tasks_.empty(); return tasks_.empty();
} }
private: private:
std::queue<std::packaged_task<void()> > tasks_; std::queue<std::packaged_task<void()>> tasks_;
std::mutex mtx_; std::mutex mtx_;
}; };
class ThreadPool { class ThreadPool
{
public: public:
explicit ThreadPool(size_t nWorkers) explicit ThreadPool(size_t nWorkers)
: : tqit_(taskQueues_.begin())
tqit_(taskQueues_.begin()), stop_(false), drain_(false) { , stop_(false)
, drain_(false)
{
resize(nWorkers); resize(nWorkers);
} }
~ThreadPool() { shutdown(); } ~ThreadPool()
{
shutdown();
}
void shutdown() { void shutdown()
{
stop_ = true; stop_ = true;
cv_.notify_all(); cv_.notify_all();
for (std::thread &worker : workers_) { for (std::thread &worker : workers_) {
@@ -81,22 +92,26 @@ namespace daggy {
} }
} }
void drain() { void drain()
{
drain_ = true; drain_ = true;
while (true) { while (true) {
{ {
std::lock_guard<std::mutex> guard(mtx_); std::lock_guard<std::mutex> guard(mtx_);
if (taskQueues_.empty()) break; if (taskQueues_.empty())
break;
} }
std::this_thread::sleep_for(250ms); std::this_thread::sleep_for(250ms);
} }
} }
void restart() { void restart()
{
drain_ = false; drain_ = false;
} }
void resize(size_t nWorkers) { void resize(size_t nWorkers)
{
shutdown(); shutdown();
workers_.clear(); workers_.clear();
stop_ = false; stop_ = false;
@@ -109,26 +124,30 @@ namespace daggy {
std::unique_lock<std::mutex> lock(mtx_); std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [&] { return stop_ || !taskQueues_.empty(); }); cv_.wait(lock, [&] { return stop_ || !taskQueues_.empty(); });
if (taskQueues_.empty()) { if (taskQueues_.empty()) {
if (stop_) return; if (stop_)
return;
continue; continue;
} }
if (tqit_ == taskQueues_.end()) tqit_ = taskQueues_.begin(); if (tqit_ == taskQueues_.end())
tqit_ = taskQueues_.begin();
task = std::move((*tqit_)->pop()); task = std::move((*tqit_)->pop());
if ((*tqit_)->empty()) { if ((*tqit_)->empty()) {
tqit_ = taskQueues_.erase(tqit_); tqit_ = taskQueues_.erase(tqit_);
} else { }
else {
tqit_++; tqit_++;
} }
} }
task(); task();
} }
} });
);
}; };
template<class F, class... Args> template <class F, class... Args>
decltype(auto) addTask(F &&f, Args &&... args) { decltype(auto) addTask(F &&f, Args &&...args)
if (drain_) throw std::runtime_error("Unable to add task to draining pool"); {
if (drain_)
throw std::runtime_error("Unable to add task to draining pool");
auto tq = std::make_shared<TaskQueue>(); auto tq = std::make_shared<TaskQueue>();
auto fut = tq->addTask(f, args...); auto fut = tq->addTask(f, args...);
@@ -141,8 +160,10 @@ namespace daggy {
return fut; return fut;
} }
void addTasks(std::shared_ptr<TaskQueue> tq) { void addTasks(std::shared_ptr<TaskQueue> tq)
if (drain_) throw std::runtime_error("Unable to add task to draining pool"); {
if (drain_)
throw std::runtime_error("Unable to add task to draining pool");
std::lock_guard<std::mutex> guard(mtx_); std::lock_guard<std::mutex> guard(mtx_);
taskQueues_.push_back(tq); taskQueues_.push_back(tq);
cv_.notify_one(); cv_.notify_one();
@@ -162,4 +183,4 @@ namespace daggy {
std::atomic<bool> drain_; std::atomic<bool> drain_;
}; };
} } // namespace daggy

View File

@@ -1,41 +1,39 @@
#pragma once #pragma once
#include <vector>
#include <string>
#include <variant>
#include <unordered_map>
#include <rapidjson/document.h> #include <rapidjson/document.h>
#include "daggy/loggers/dag_run/DAGRunLogger.hpp" #include <string>
#include "daggy/executors/task/TaskExecutor.hpp" #include <unordered_map>
#include "Defines.hpp" #include <variant>
#include <vector>
#include "DAG.hpp" #include "DAG.hpp"
#include "Defines.hpp"
#include "daggy/executors/task/TaskExecutor.hpp"
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
namespace daggy { namespace daggy {
using TaskDAG = DAG<std::string, Task>; using TaskDAG = DAG<std::string, Task>;
std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement); std::string globalSub(std::string string, const std::string &pattern,
const std::string &replacement);
std::vector<Command> interpolateValues(const std::vector<std::string> &raw, const ConfigValues &values); std::vector<Command> interpolateValues(const std::vector<std::string> &raw,
const ConfigValues &values);
TaskSet TaskSet expandTaskSet(const TaskSet &tasks,
expandTaskSet(const TaskSet &tasks,
executors::task::TaskExecutor &executor, executors::task::TaskExecutor &executor,
const ConfigValues &interpolatedValues = {}); const ConfigValues &interpolatedValues = {});
TaskDAG buildDAGFromTasks(
TaskDAG TaskSet &tasks,
buildDAGFromTasks(TaskSet &tasks,
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates = {}); const std::vector<loggers::dag_run::TaskUpdateRecord> &updates = {});
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks); void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks);
TaskDAG runDAG(DAGRunID runID, TaskDAG runDAG(DAGRunID runID, executors::task::TaskExecutor &executor,
executors::task::TaskExecutor &executor, loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
loggers::dag_run::DAGRunLogger &logger,
TaskDAG dag,
const ConfigValues job = {}); const ConfigValues job = {});
std::ostream &operator<<(std::ostream &os, const TimePoint &tp); std::ostream &operator<<(std::ostream &os, const TimePoint &tp);
} } // namespace daggy

View File

@@ -1,27 +1,33 @@
#pragma once #pragma once
#include "TaskExecutor.hpp"
#include <daggy/ThreadPool.hpp> #include <daggy/ThreadPool.hpp>
#include "TaskExecutor.hpp"
namespace daggy::executors::task { namespace daggy::executors::task {
class ForkingTaskExecutor : public TaskExecutor { class ForkingTaskExecutor : public TaskExecutor
{
public: public:
using Command = std::vector<std::string>; using Command = std::vector<std::string>;
ForkingTaskExecutor(size_t nThreads) ForkingTaskExecutor(size_t nThreads)
: tp_(nThreads) {} : tp_(nThreads)
{
}
// Validates the job to ensure that all required values are set and are of the right type, // Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override; bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues> std::vector<ConfigValues> expandTaskParameters(
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) override; const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task // Runs the task
std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) override; std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) override;
private: private:
ThreadPool tp_; ThreadPool tp_;
AttemptRecord runTask(const Task & task); AttemptRecord runTask(const Task &task);
}; };
} } // namespace daggy::executors::task

View File

@@ -3,18 +3,20 @@
#include "TaskExecutor.hpp" #include "TaskExecutor.hpp"
namespace daggy::executors::task { namespace daggy::executors::task {
class NoopTaskExecutor : public TaskExecutor { class NoopTaskExecutor : public TaskExecutor
{
public: public:
using Command = std::vector<std::string>; using Command = std::vector<std::string>;
// Validates the job to ensure that all required values are set and are of the right type, // Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override; bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues> std::vector<ConfigValues> expandTaskParameters(
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) override; const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task // Runs the task
std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) override; std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) override;
}; };
} } // namespace daggy::executors::task

View File

@@ -3,24 +3,28 @@
#include "TaskExecutor.hpp" #include "TaskExecutor.hpp"
namespace daggy::executors::task { namespace daggy::executors::task {
class SlurmTaskExecutor : public TaskExecutor { class SlurmTaskExecutor : public TaskExecutor
{
public: public:
using Command = std::vector<std::string>; using Command = std::vector<std::string>;
SlurmTaskExecutor(); SlurmTaskExecutor();
~SlurmTaskExecutor(); ~SlurmTaskExecutor();
// Validates the job to ensure that all required values are set and are of the right type, // Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override; bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues> std::vector<ConfigValues> expandTaskParameters(
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) override; const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task // Runs the task
std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) override; std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) override;
private: private:
struct Job { struct Job
{
std::promise<AttemptRecord> prom; std::promise<AttemptRecord> prom;
std::string stdoutFile; std::string stdoutFile;
std::string stderrFile; std::string stderrFile;
@@ -34,4 +38,4 @@ namespace daggy::executors::task {
std::thread monitorWorker_; std::thread monitorWorker_;
void monitor(); void monitor();
}; };
} } // namespace daggy::executors::task

View File

@@ -1,31 +1,33 @@
#pragma once #pragma once
#include <chrono> #include <chrono>
#include <daggy/Defines.hpp>
#include <future> #include <future>
#include <string> #include <string>
#include <thread> #include <thread>
#include <vector> #include <vector>
#include <daggy/Defines.hpp>
/* /*
Executors run Tasks, returning a future with the results. Executors run Tasks, returning a future with the results.
If there are many retries, logs are returned for each attempt. If there are many retries, logs are returned for each attempt.
*/ */
namespace daggy::executors::task { namespace daggy::executors::task {
class TaskExecutor { class TaskExecutor
{
public: public:
virtual ~TaskExecutor() = default; virtual ~TaskExecutor() = default;
// Validates the job to ensure that all required values are set and are of the right type, // Validates the job to ensure that all required values are set and are of
// the right type,
virtual bool validateTaskParameters(const ConfigValues &job) = 0; virtual bool validateTaskParameters(const ConfigValues &job) = 0;
// Will use the expansion values to return the fully expanded tasks. // Will use the expansion values to return the fully expanded tasks.
virtual std::vector<ConfigValues> virtual std::vector<ConfigValues> expandTaskParameters(
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) = 0; const ConfigValues &job, const ConfigValues &expansionValues) = 0;
// Blocking execution of a task // Blocking execution of a task
virtual std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) = 0; virtual std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) = 0;
}; };
} } // namespace daggy::executors::task

View File

@@ -11,32 +11,32 @@
be supported. be supported.
*/ */
namespace daggy { namespace daggy { namespace loggers { namespace dag_run {
namespace loggers { class DAGRunLogger
namespace dag_run { {
class DAGRunLogger {
public: public:
virtual ~DAGRunLogger() = default; virtual ~DAGRunLogger() = default;
// Execution // Execution
virtual DAGRunID startDAGRun(std::string name, const TaskSet &tasks) = 0; virtual DAGRunID startDAGRun(std::string name, const TaskSet &tasks) = 0;
virtual void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) = 0; virtual void addTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) = 0;
virtual void updateTask(DAGRunID dagRunID, const std::string taskName, const Task &task) = 0; virtual void updateTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) = 0;
virtual void updateDAGRunState(DAGRunID dagRunID, RunState state) = 0; virtual void updateDAGRunState(DAGRunID dagRunID, RunState state) = 0;
virtual void virtual void logTaskAttempt(DAGRunID dagRunID, const std::string &taskName,
logTaskAttempt(DAGRunID dagRunID, const std::string &taskName, const AttemptRecord &attempt) = 0; const AttemptRecord &attempt) = 0;
virtual void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) = 0; virtual void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) = 0;
// Querying // Querying
virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) = 0; virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) = 0;
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0; virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
}; };
} }}} // namespace daggy::loggers::dag_run
}
}

View File

@@ -2,26 +2,29 @@
#include <cstdint> #include <cstdint>
#include <string> #include <string>
#include <vector>
#include <unordered_set>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <vector>
#include "../../Defines.hpp" #include "../../Defines.hpp"
namespace daggy::loggers::dag_run { namespace daggy::loggers::dag_run {
struct TaskUpdateRecord { struct TaskUpdateRecord
{
TimePoint time; TimePoint time;
std::string taskName; std::string taskName;
RunState newState; RunState newState;
}; };
struct DAGUpdateRecord { struct DAGUpdateRecord
{
TimePoint time; TimePoint time;
RunState newState; RunState newState;
}; };
// Pretty heavy weight, but // Pretty heavy weight, but
struct DAGRunRecord { struct DAGRunRecord
{
std::string name; std::string name;
TaskSet tasks; TaskSet tasks;
std::unordered_map<std::string, RunState> taskRunStates; std::unordered_map<std::string, RunState> taskRunStates;
@@ -30,7 +33,8 @@ namespace daggy::loggers::dag_run {
std::vector<DAGUpdateRecord> dagStateChanges; std::vector<DAGUpdateRecord> dagStateChanges;
}; };
struct DAGRunSummary { struct DAGRunSummary
{
DAGRunID runID; DAGRunID runID;
std::string name; std::string name;
RunState runState; RunState runState;
@@ -38,4 +42,4 @@ namespace daggy::loggers::dag_run {
TimePoint lastUpdate; TimePoint lastUpdate;
std::unordered_map<RunState, size_t> taskStateCounts; std::unordered_map<RunState, size_t> taskStateCounts;
}; };
} } // namespace daggy::loggers::dag_run

View File

@@ -1,10 +1,11 @@
#pragma once #pragma once
#include <filesystem> #include <rapidjson/document.h>
#include <atomic> #include <atomic>
#include <filesystem>
#include <mutex> #include <mutex>
#include <rapidjson/document.h>
#include "DAGRunLogger.hpp" #include "DAGRunLogger.hpp"
#include "Defines.hpp" #include "Defines.hpp"
@@ -13,8 +14,8 @@ namespace rj = rapidjson;
namespace daggy::loggers::dag_run { namespace daggy::loggers::dag_run {
/* /*
* This logger should only be used for debug purposes. It's not really optimized for querying, and will * This logger should only be used for debug purposes. It's not really
* use a ton of inodes to track state. * optimized for querying, and will use a ton of inodes to track state.
* *
* On the plus side, it's trivial to look at without using the API. * On the plus side, it's trivial to look at without using the API.
* *
@@ -32,7 +33,8 @@ namespace daggy::loggers::dag_run {
* error.log * error.log
* executor.log * executor.log
*/ */
class FileSystemLogger : public DAGRunLogger { class FileSystemLogger : public DAGRunLogger
{
public: public:
FileSystemLogger(fs::path root); FileSystemLogger(fs::path root);
@@ -41,10 +43,11 @@ namespace daggy::loggers::dag_run {
void updateDAGRunState(DAGRunID dagRunID, RunState state) override; void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
void void logTaskAttempt(DAGRunID, const std::string &taskName,
logTaskAttempt(DAGRunID, const std::string &taskName, const AttemptRecord &attempt) override; const AttemptRecord &attempt) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) override; void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) override;
// Querying // Querying
std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override; std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
@@ -64,4 +67,4 @@ namespace daggy::loggers::dag_run {
inline const fs::path getRunRoot(DAGRunID runID) const; inline const fs::path getRunRoot(DAGRunID runID) const;
}; };
} } // namespace daggy::loggers::dag_run

View File

@@ -6,30 +6,32 @@
#include "DAGRunLogger.hpp" #include "DAGRunLogger.hpp"
#include "Defines.hpp" #include "Defines.hpp"
namespace daggy { namespace daggy { namespace loggers { namespace dag_run {
namespace loggers {
namespace dag_run {
/* /*
* This logger should only be used for debug purposes. It doesn't actually log anything, just prints stuff * This logger should only be used for debug purposes. It doesn't actually log
* to stdout. * anything, just prints stuff to stdout.
*/ */
class OStreamLogger : public DAGRunLogger { class OStreamLogger : public DAGRunLogger
{
public: public:
OStreamLogger(std::ostream &os); OStreamLogger(std::ostream &os);
// Execution // Execution
DAGRunID startDAGRun(std::string name, const TaskSet &tasks) override; DAGRunID startDAGRun(std::string name, const TaskSet &tasks) override;
void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) override; void addTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) override;
void updateTask(DAGRunID dagRunID, const std::string taskName, const Task &task) override; void updateTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) override;
void updateDAGRunState(DAGRunID dagRunID, RunState state) override; void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
void void logTaskAttempt(DAGRunID, const std::string &taskName,
logTaskAttempt(DAGRunID, const std::string &taskName, const AttemptRecord &attempt) override; const AttemptRecord &attempt) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) override; void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) override;
// Querying // Querying
std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override; std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
@@ -41,10 +43,9 @@ namespace daggy {
std::ostream &os_; std::ostream &os_;
std::vector<DAGRunRecord> dagRuns_; std::vector<DAGRunRecord> dagRuns_;
void _updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state); void _updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state);
void _updateDAGRunState(DAGRunID dagRunID, RunState state); void _updateDAGRunState(DAGRunID dagRunID, RunState state);
}; };
} }}} // namespace daggy::loggers::dag_run
}
}

View File

@@ -1,13 +1,13 @@
#include <sstream>
#include <iomanip>
#include <rapidjson/error/en.h> #include <rapidjson/error/en.h>
#include <daggy/Serialization.hpp> #include <daggy/Serialization.hpp>
#include <daggy/Utilities.hpp> #include <daggy/Utilities.hpp>
#include <iomanip>
#include <sstream>
namespace daggy { namespace daggy {
void checkRJParse(const rj::ParseResult &result, const std::string &prefix) { void checkRJParse(const rj::ParseResult &result, const std::string &prefix)
{
if (!result) { if (!result) {
std::stringstream ss; std::stringstream ss;
ss << (prefix.empty() ? "" : prefix + ':') ss << (prefix.empty() ? "" : prefix + ':')
@@ -17,15 +17,19 @@ namespace daggy {
} }
} }
ConfigValues configFromJSON(const std::string &jsonSpec) { ConfigValues configFromJSON(const std::string &jsonSpec)
{
rj::Document doc; rj::Document doc;
checkRJParse(doc.Parse(jsonSpec.c_str()), "Parsing config"); checkRJParse(doc.Parse(jsonSpec.c_str()), "Parsing config");
return configFromJSON(doc); return configFromJSON(doc);
} }
ConfigValues configFromJSON(const rj::Value &spec) { ConfigValues configFromJSON(const rj::Value &spec)
{
std::unordered_map<std::string, ConfigValue> parameters; std::unordered_map<std::string, ConfigValue> parameters;
if (!spec.IsObject()) { throw std::runtime_error("Parameters in spec is not a JSON dictionary"); } if (!spec.IsObject()) {
throw std::runtime_error("Parameters in spec is not a JSON dictionary");
}
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) { for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
if (!it->name.IsString()) { if (!it->name.IsString()) {
throw std::runtime_error("All keys must be strings."); throw std::runtime_error("All keys must be strings.");
@@ -36,37 +40,52 @@ namespace daggy {
for (size_t i = 0; i < it->value.Size(); ++i) { for (size_t i = 0; i < it->value.Size(); ++i) {
if (!it->value[i].IsString()) { if (!it->value[i].IsString()) {
throw std::runtime_error( throw std::runtime_error(
"Attribute for " + std::string{it->name.GetString()} + " item " + std::to_string(i) + "Attribute for " + std::string{it->name.GetString()} +
" is not a string."); " item " + std::to_string(i) + " is not a string.");
} }
values.emplace_back(it->value[i].GetString()); values.emplace_back(it->value[i].GetString());
} }
parameters[name] = values; parameters[name] = values;
} else if (it->value.IsString()) { }
else if (it->value.IsString()) {
parameters[name] = it->value.GetString(); parameters[name] = it->value.GetString();
} else { }
throw std::runtime_error( else {
"Attribute for " + std::string{it->name.GetString()} + " is not a string or an array."); throw std::runtime_error("Attribute for " +
std::string{it->name.GetString()} +
" is not a string or an array.");
} }
} }
return parameters; return parameters;
} }
std::string configToJSON(const ConfigValues &config) { std::string configToJSON(const ConfigValues &config)
{
std::stringstream ss; std::stringstream ss;
ss << '{'; ss << '{';
bool first = true; bool first = true;
for (const auto &[k, v]: config) { for (const auto &[k, v] : config) {
if (first) { first = false; } else { ss << ", "; } if (first) {
first = false;
}
else {
ss << ", ";
}
ss << std::quoted(k) << ": "; ss << std::quoted(k) << ": ";
if (std::holds_alternative<std::string>(v)) { if (std::holds_alternative<std::string>(v)) {
ss << std::quoted(std::get<std::string>(v)); ss << std::quoted(std::get<std::string>(v));
} else { }
else {
ss << '['; ss << '[';
const auto &vals = std::get<std::vector<std::string>>(v); const auto &vals = std::get<std::vector<std::string>>(v);
bool firstVal = true; bool firstVal = true;
for (const auto &val: vals) { for (const auto &val : vals) {
if (firstVal) { firstVal = false; } else { ss << ", "; } if (firstVal) {
firstVal = false;
}
else {
ss << ", ";
}
ss << std::quoted(val); ss << std::quoted(val);
} }
ss << ']'; ss << ']';
@@ -76,16 +95,17 @@ namespace daggy {
return ss.str(); return ss.str();
} }
Task Task taskFromJSON(const std::string &name, const rj::Value &spec,
taskFromJSON(const std::string &name, const rj::Value &spec, const ConfigValues &jobDefaults) { const ConfigValues &jobDefaults)
Task task{ {
.definedName = name, Task task{.definedName = name,
.isGenerator = false, .isGenerator = false,
.maxRetries = 0, .maxRetries = 0,
.retryIntervalSeconds = 0, .retryIntervalSeconds = 0,
.job = jobDefaults .job = jobDefaults};
}; if (!spec.IsObject()) {
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); } throw std::runtime_error("Tasks is not an object");
}
// Grab the standard fields with defaults; // Grab the standard fields with defaults;
if (spec.HasMember("isGenerator")) { if (spec.HasMember("isGenerator")) {
@@ -117,17 +137,21 @@ namespace daggy {
if (spec.HasMember("job")) { if (spec.HasMember("job")) {
const auto &params = spec["job"]; const auto &params = spec["job"];
if (!params.IsObject()) throw std::runtime_error("job is not a dictionary."); if (!params.IsObject())
throw std::runtime_error("job is not a dictionary.");
for (auto it = params.MemberBegin(); it != params.MemberEnd(); ++it) { for (auto it = params.MemberBegin(); it != params.MemberEnd(); ++it) {
if (!it->name.IsString()) throw std::runtime_error("job key must be a string."); if (!it->name.IsString())
throw std::runtime_error("job key must be a string.");
if (it->value.IsArray()) { if (it->value.IsArray()) {
std::vector<std::string> values; std::vector<std::string> values;
for (size_t i = 0; i < it->value.Size(); ++i) { for (size_t i = 0; i < it->value.Size(); ++i) {
values.emplace_back(it->value[i].GetString()); values.emplace_back(it->value[i].GetString());
} }
task.job.insert_or_assign(it->name.GetString(), values); task.job.insert_or_assign(it->name.GetString(), values);
} else { }
task.job.insert_or_assign(it->name.GetString(), it->value.GetString()); else {
task.job.insert_or_assign(it->name.GetString(),
it->value.GetString());
} }
} }
} }
@@ -135,27 +159,34 @@ namespace daggy {
return task; return task;
} }
TaskSet tasksFromJSON(const std::string &jsonSpec, const ConfigValues &jobDefaults) { TaskSet tasksFromJSON(const std::string &jsonSpec,
const ConfigValues &jobDefaults)
{
rj::Document doc; rj::Document doc;
checkRJParse(doc.Parse(jsonSpec.c_str())); checkRJParse(doc.Parse(jsonSpec.c_str()));
return tasksFromJSON(doc, jobDefaults); return tasksFromJSON(doc, jobDefaults);
} }
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults) { TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults)
{
TaskSet tasks; TaskSet tasks;
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); } if (!spec.IsObject()) {
throw std::runtime_error("Tasks is not an object");
}
// Tasks // Tasks
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) { for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
if (!it->name.IsString()) throw std::runtime_error("Task names must be a string."); if (!it->name.IsString())
if (!it->value.IsObject()) throw std::runtime_error("Task definitions must be an object."); throw std::runtime_error("Task names must be a string.");
if (!it->value.IsObject())
throw std::runtime_error("Task definitions must be an object.");
const auto &taskName = it->name.GetString(); const auto &taskName = it->name.GetString();
tasks.emplace(taskName, taskFromJSON(taskName, it->value, jobDefaults)); tasks.emplace(taskName, taskFromJSON(taskName, it->value, jobDefaults));
} }
// Normalize tasks so all the children are populated // Normalize tasks so all the children are populated
for (auto &[k, v] : tasks) { for (auto &[k, v] : tasks) {
for (const auto & p : v.parents) { for (const auto &p : v.parents) {
tasks[p].children.insert(k); tasks[p].children.insert(k);
} }
v.parents.clear(); v.parents.clear();
@@ -164,9 +195,10 @@ namespace daggy {
return tasks; return tasks;
} }
// I really want to do this with rapidjson, but damn they make it ugly and difficult. // I really want to do this with rapidjson, but damn they make it ugly and
// So we'll shortcut and generate the JSON directly. // difficult. So we'll shortcut and generate the JSON directly.
std::string taskToJSON(const Task &task) { std::string taskToJSON(const Task &task)
{
std::stringstream ss; std::stringstream ss;
bool first = false; bool first = false;
@@ -178,8 +210,9 @@ namespace daggy {
ss << R"("children": [)"; ss << R"("children": [)";
first = true; first = true;
for (const auto &child: task.children) { for (const auto &child : task.children) {
if (!first) ss << ','; if (!first)
ss << ',';
ss << std::quoted(child); ss << std::quoted(child);
first = false; first = false;
} }
@@ -187,8 +220,9 @@ namespace daggy {
ss << R"("parents": [)"; ss << R"("parents": [)";
first = true; first = true;
for (const auto &parent: task.parents) { for (const auto &parent : task.parents) {
if (!first) ss << ','; if (!first)
ss << ',';
ss << std::quoted(parent); ss << std::quoted(parent);
first = false; first = false;
} }
@@ -200,14 +234,16 @@ namespace daggy {
return ss.str(); return ss.str();
} }
std::string tasksToJSON(const TaskSet &tasks) { std::string tasksToJSON(const TaskSet &tasks)
{
std::stringstream ss; std::stringstream ss;
ss << "{"; ss << "{";
bool first = true; bool first = true;
for (const auto &[name, task]: tasks) { for (const auto &[name, task] : tasks) {
if (!first) ss << ','; if (!first)
ss << ',';
ss << std::quoted(name) << ": " << taskToJSON(task); ss << std::quoted(name) << ": " << taskToJSON(task);
first = false; first = false;
} }
@@ -216,37 +252,41 @@ namespace daggy {
return ss.str(); return ss.str();
} }
std::ostream &operator<<(std::ostream &os, const Task &task) { std::ostream &operator<<(std::ostream &os, const Task &task)
{
os << taskToJSON(task); os << taskToJSON(task);
return os; return os;
} }
std::string attemptRecordToJSON(const AttemptRecord &record) { std::string attemptRecordToJSON(const AttemptRecord &record)
{
std::stringstream ss; std::stringstream ss;
ss << "{" ss << "{"
<< R"("startTime": )" << std::quoted(timePointToString(record.startTime)) << ',' << R"("startTime": )" << std::quoted(timePointToString(record.startTime))
<< R"("stopTime": )" << std::quoted(timePointToString(record.stopTime)) << ',' << ',' << R"("stopTime": )"
<< R"("rc": )" << std::to_string(record.rc) << ',' << std::quoted(timePointToString(record.stopTime)) << ',' << R"("rc": )"
<< R"("executorLog": )" << std::quoted(record.executorLog) << ',' << std::to_string(record.rc) << ',' << R"("executorLog": )"
<< R"("outputLog": )" << std::quoted(record.outputLog) << ',' << std::quoted(record.executorLog) << ',' << R"("outputLog": )"
<< R"("errorLog": )" << std::quoted(record.errorLog) << std::quoted(record.outputLog) << ',' << R"("errorLog": )"
<< '}'; << std::quoted(record.errorLog) << '}';
return ss.str(); return ss.str();
} }
std::string timePointToString(const TimePoint &tp) { std::string timePointToString(const TimePoint &tp)
{
std::stringstream ss; std::stringstream ss;
ss << tp; ss << tp;
return ss.str(); return ss.str();
} }
TimePoint stringToTimePoint(const std::string &timeString) { TimePoint stringToTimePoint(const std::string &timeString)
{
std::tm dt; std::tm dt;
std::stringstream ss{timeString}; std::stringstream ss{timeString};
ss >> std::get_time(&dt, "%Y-%m-%d %H:%M:%S %Z"); ss >> std::get_time(&dt, "%Y-%m-%d %H:%M:%S %Z");
return Clock::from_time_t(mktime(&dt)); return Clock::from_time_t(mktime(&dt));
} }
} } // namespace daggy

View File

@@ -1,84 +1,82 @@
#include <iomanip>
#include <enum.h> #include <enum.h>
#include <daggy/Server.hpp>
#include <daggy/Serialization.hpp> #include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/Utilities.hpp> #include <daggy/Utilities.hpp>
#include <iomanip>
#define REQ_ERROR(code, msg) response.send(Pistache::Http::Code::code, msg); return; #define REQ_ERROR(code, msg) \
response.send(Pistache::Http::Code::code, msg); \
return;
namespace rj = rapidjson; namespace rj = rapidjson;
using namespace Pistache; using namespace Pistache;
namespace daggy { namespace daggy {
void Server::init(int threads) { void Server::init(int threads)
{
auto opts = Http::Endpoint::options() auto opts = Http::Endpoint::options()
.threads(threads) .threads(threads)
.flags(Pistache::Tcp::Options::ReuseAddr | Pistache::Tcp::Options::ReusePort) .flags(Pistache::Tcp::Options::ReuseAddr |
Pistache::Tcp::Options::ReusePort)
.maxRequestSize(4294967296) .maxRequestSize(4294967296)
.maxResponseSize(4294967296); .maxResponseSize(4294967296);
endpoint_.init(opts); endpoint_.init(opts);
createDescription(); createDescription();
} }
void Server::start() { void Server::start()
{
router_.initFromDescription(desc_); router_.initFromDescription(desc_);
endpoint_.setHandler(router_.handler()); endpoint_.setHandler(router_.handler());
endpoint_.serveThreaded(); endpoint_.serveThreaded();
} }
void Server::shutdown() { void Server::shutdown()
{
endpoint_.shutdown(); endpoint_.shutdown();
} }
uint16_t Server::getPort() const { uint16_t Server::getPort() const
{
return endpoint_.getPort(); return endpoint_.getPort();
} }
void Server::createDescription() { void Server::createDescription()
desc_ {
.info() desc_.info().license("MIT", "https://opensource.org/licenses/MIT");
.license("MIT", "https://opensource.org/licenses/MIT");
auto backendErrorResponse = desc_.response(
Http::Code::Internal_Server_Error, "An error occured with the backend");
auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error, desc_.schemes(Rest::Scheme::Http)
"An error occured with the backend");
desc_
.schemes(Rest::Scheme::Http)
.basePath("/v1") .basePath("/v1")
.produces(MIME(Application, Json)) .produces(MIME(Application, Json))
.consumes(MIME(Application, Json)); .consumes(MIME(Application, Json));
desc_ desc_.route(desc_.get("/ready"))
.route(desc_.get("/ready"))
.bind(&Server::handleReady, this) .bind(&Server::handleReady, this)
.response(Http::Code::Ok, "Response to the /ready call") .response(Http::Code::Ok, "Response to the /ready call")
.hide(); .hide();
auto versionPath = desc_.path("/v1"); auto versionPath = desc_.path("/v1");
auto dagPath = versionPath.path("/dagrun"); auto dagPath = versionPath.path("/dagrun");
// Run a DAG // Run a DAG
dagPath dagPath.route(desc_.post("/"))
.route(desc_.post("/"))
.bind(&Server::handleRunDAG, this) .bind(&Server::handleRunDAG, this)
.produces(MIME(Application, Json), MIME(Application, Xml)) .produces(MIME(Application, Json), MIME(Application, Xml))
.response(Http::Code::Ok, "Run a DAG"); .response(Http::Code::Ok, "Run a DAG");
// List detailed DAG run // List detailed DAG run
dagPath dagPath.route(desc_.get("/:runID"))
.route(desc_.get("/:runID"))
.bind(&Server::handleGetDAGRun, this) .bind(&Server::handleGetDAGRun, this)
.produces(MIME(Application, Json), MIME(Application, Xml)) .produces(MIME(Application, Json), MIME(Application, Xml))
.response(Http::Code::Ok, "Details of a specific DAG run"); .response(Http::Code::Ok, "Details of a specific DAG run");
// List all DAG runs // List all DAG runs
dagPath dagPath.route(desc_.get("/"))
.route(desc_.get("/"))
.bind(&Server::handleGetDAGRuns, this) .bind(&Server::handleGetDAGRuns, this)
.produces(MIME(Application, Json), MIME(Application, Xml)) .produces(MIME(Application, Json), MIME(Application, Xml))
.response(Http::Code::Ok, "The list of all known DAG Runs"); .response(Http::Code::Ok, "The list of all known DAG Runs");
@@ -90,19 +88,29 @@ namespace daggy {
* "job": {...} * "job": {...}
* "tasks": {...} * "tasks": {...}
*/ */
void Server::handleRunDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) { void Server::handleRunDAG(const Pistache::Rest::Request &request,
if (!handleAuth(request, response)) return; Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request, response))
return;
rj::Document doc; rj::Document doc;
try { try {
doc.Parse(request.body().c_str()); doc.Parse(request.body().c_str());
} catch (std::exception &e) { }
catch (std::exception &e) {
REQ_ERROR(Bad_Request, std::string{"Invalid JSON payload: "} + e.what()); REQ_ERROR(Bad_Request, std::string{"Invalid JSON payload: "} + e.what());
} }
if (!doc.IsObject()) { REQ_ERROR(Bad_Request, "Payload is not a dictionary."); } if (!doc.IsObject()) {
if (!doc.HasMember("name")) { REQ_ERROR(Bad_Request, "DAG Run is missing a name."); } REQ_ERROR(Bad_Request, "Payload is not a dictionary.");
if (!doc.HasMember("tasks")) { REQ_ERROR(Bad_Request, "DAG Run has no tasks."); } }
if (!doc.HasMember("name")) {
REQ_ERROR(Bad_Request, "DAG Run is missing a name.");
}
if (!doc.HasMember("tasks")) {
REQ_ERROR(Bad_Request, "DAG Run has no tasks.");
}
std::string runName = doc["name"].GetString(); std::string runName = doc["name"].GetString();
@@ -112,7 +120,8 @@ namespace daggy {
try { try {
auto parsedParams = configFromJSON(doc["parameters"].GetObject()); auto parsedParams = configFromJSON(doc["parameters"].GetObject());
parameters.swap(parsedParams); parameters.swap(parsedParams);
} catch (std::exception &e) { }
catch (std::exception &e) {
REQ_ERROR(Bad_Request, e.what()); REQ_ERROR(Bad_Request, e.what());
} }
} }
@@ -123,7 +132,8 @@ namespace daggy {
try { try {
auto parsedJobDefaults = configFromJSON(doc["jobDefaults"].GetObject()); auto parsedJobDefaults = configFromJSON(doc["jobDefaults"].GetObject());
jobDefaults.swap(parsedJobDefaults); jobDefaults.swap(parsedJobDefaults);
} catch (std::exception &e) { }
catch (std::exception &e) {
REQ_ERROR(Bad_Request, e.what()); REQ_ERROR(Bad_Request, e.what());
} }
} }
@@ -134,7 +144,8 @@ namespace daggy {
auto taskTemplates = tasksFromJSON(doc["tasks"], jobDefaults); auto taskTemplates = tasksFromJSON(doc["tasks"], jobDefaults);
auto expandedTasks = expandTaskSet(taskTemplates, executor_, parameters); auto expandedTasks = expandTaskSet(taskTemplates, executor_, parameters);
tasks.swap(expandedTasks); tasks.swap(expandedTasks);
} catch (std::exception &e) { }
catch (std::exception &e) {
REQ_ERROR(Bad_Request, e.what()); REQ_ERROR(Bad_Request, e.what());
} }
@@ -142,37 +153,45 @@ namespace daggy {
auto runID = logger_.startDAGRun(runName, tasks); auto runID = logger_.startDAGRun(runName, tasks);
auto dag = buildDAGFromTasks(tasks); auto dag = buildDAGFromTasks(tasks);
runnerPool_.addTask( runnerPool_.addTask([this, parameters, runID, dag]() {
[this, parameters, runID, dag]() { runDAG(runID, this->executor_, this->logger_, dag, parameters); }); runDAG(runID, this->executor_, this->logger_, dag, parameters);
});
response.send(Pistache::Http::Code::Ok, R"({"runID": )" + std::to_string(runID) + "}"); response.send(Pistache::Http::Code::Ok,
R"({"runID": )" + std::to_string(runID) + "}");
} }
void Server::handleGetDAGRuns(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) { void Server::handleGetDAGRuns(const Pistache::Rest::Request &request,
if (!handleAuth(request, response)) return; Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request, response))
return;
auto dagRuns = logger_.getDAGs(0); auto dagRuns = logger_.getDAGs(0);
std::stringstream ss; std::stringstream ss;
ss << '['; ss << '[';
bool first = true; bool first = true;
for (const auto &run: dagRuns) { for (const auto &run : dagRuns) {
if (first) { if (first) {
first = false; first = false;
} else { }
else {
ss << ", "; ss << ", ";
} }
ss << " {" ss << " {"
<< R"("runID": )" << run.runID << ',' << R"("runID": )" << run.runID << ',' << R"("name": )"
<< R"("name": )" << std::quoted(run.name) << "," << std::quoted(run.name) << ","
<< R"("startTime": )" << std::quoted(timePointToString(run.startTime)) << ',' << R"("startTime": )" << std::quoted(timePointToString(run.startTime))
<< R"("lastUpdate": )" << std::quoted(timePointToString(run.lastUpdate)) << ',' << ',' << R"("lastUpdate": )"
<< std::quoted(timePointToString(run.lastUpdate)) << ','
<< R"("taskCounts": {)"; << R"("taskCounts": {)";
bool firstState = true; bool firstState = true;
for (const auto &[state, count]: run.taskStateCounts) { for (const auto &[state, count] : run.taskStateCounts) {
if (firstState) { if (firstState) {
firstState = false; firstState = false;
} else { }
else {
ss << ", "; ss << ", ";
} }
ss << std::quoted(state._to_string()) << ':' << count; ss << std::quoted(state._to_string()) << ':' << count;
@@ -185,24 +204,34 @@ namespace daggy {
response.send(Pistache::Http::Code::Ok, ss.str()); response.send(Pistache::Http::Code::Ok, ss.str());
} }
void Server::handleGetDAGRun(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) { void Server::handleGetDAGRun(const Pistache::Rest::Request &request,
if (!handleAuth(request, response)) return; Pistache::Http::ResponseWriter response)
if (!request.hasParam(":runID")) { REQ_ERROR(Not_Found, "No runID provided in URL"); } {
if (!handleAuth(request, response))
return;
if (!request.hasParam(":runID")) {
REQ_ERROR(Not_Found, "No runID provided in URL");
}
DAGRunID runID = request.param(":runID").as<size_t>(); DAGRunID runID = request.param(":runID").as<size_t>();
auto run = logger_.getDAGRun(runID); auto run = logger_.getDAGRun(runID);
bool first = true; bool first = true;
std::stringstream ss; std::stringstream ss;
ss << "{" ss << "{"
<< R"("runID": )" << runID << ',' << R"("runID": )" << runID << ',' << R"("name": )"
<< R"("name": )" << std::quoted(run.name) << ',' << std::quoted(run.name) << ',' << R"("tasks": )"
<< R"("tasks": )" << tasksToJSON(run.tasks) << ','; << tasksToJSON(run.tasks) << ',';
// task run states // task run states
ss << R"("taskStates": { )"; ss << R"("taskStates": { )";
first = true; first = true;
for (const auto &[name, state]: run.taskRunStates) { for (const auto &[name, state] : run.taskRunStates) {
if (first) { first = false; } else { ss << ','; } if (first) {
first = false;
}
else {
ss << ',';
}
ss << std::quoted(name) << ": " << std::quoted(state._to_string()); ss << std::quoted(name) << ": " << std::quoted(state._to_string());
} }
ss << "},"; ss << "},";
@@ -210,20 +239,30 @@ namespace daggy {
// Attempt records // Attempt records
first = true; first = true;
ss << R"("taskAttempts": { )"; ss << R"("taskAttempts": { )";
for (const auto &[taskName, attempts]: run.taskAttempts) { for (const auto &[taskName, attempts] : run.taskAttempts) {
if (first) { first = false; } else { ss << ','; } if (first) {
first = false;
}
else {
ss << ',';
}
ss << std::quoted(taskName) << ": ["; ss << std::quoted(taskName) << ": [";
bool firstAttempt = true; bool firstAttempt = true;
for (const auto &attempt: attempts) { for (const auto &attempt : attempts) {
if (firstAttempt) { firstAttempt = false; } else { ss << ','; } if (firstAttempt) {
ss << '{' firstAttempt = false;
<< R"("startTime":)" << std::quoted(timePointToString(attempt.startTime)) << ',' }
<< R"("stopTime":)" << std::quoted(timePointToString(attempt.stopTime)) << ',' else {
<< R"("rc":)" << attempt.rc << ',' ss << ',';
<< R"("outputLog":)" << std::quoted(attempt.outputLog) << ',' }
<< R"("errorLog":)" << std::quoted(attempt.errorLog) << ',' ss << '{' << R"("startTime":)"
<< R"("executorLog":)" << std::quoted(attempt.executorLog) << std::quoted(timePointToString(attempt.startTime)) << ','
<< '}'; << R"("stopTime":)"
<< std::quoted(timePointToString(attempt.stopTime)) << ','
<< R"("rc":)" << attempt.rc << ',' << R"("outputLog":)"
<< std::quoted(attempt.outputLog) << ',' << R"("errorLog":)"
<< std::quoted(attempt.errorLog) << ',' << R"("executorLog":)"
<< std::quoted(attempt.executorLog) << '}';
} }
ss << ']'; ss << ']';
} }
@@ -232,12 +271,16 @@ namespace daggy {
// DAG state changes // DAG state changes
first = true; first = true;
ss << R"("dagStateChanges": [ )"; ss << R"("dagStateChanges": [ )";
for (const auto &change: run.dagStateChanges) { for (const auto &change : run.dagStateChanges) {
if (first) { first = false; } else { ss << ','; } if (first) {
ss << '{' first = false;
<< R"("newState": )" << std::quoted(change.newState._to_string()) << ',' }
<< R"("time": )" << std::quoted(timePointToString(change.time)) else {
<< '}'; ss << ',';
}
ss << '{' << R"("newState": )"
<< std::quoted(change.newState._to_string()) << ',' << R"("time": )"
<< std::quoted(timePointToString(change.time)) << '}';
} }
ss << "]"; ss << "]";
ss << '}'; ss << '}';
@@ -245,16 +288,21 @@ namespace daggy {
response.send(Pistache::Http::Code::Ok, ss.str()); response.send(Pistache::Http::Code::Ok, ss.str());
} }
void Server::handleReady(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) { void Server::handleReady(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
response.send(Pistache::Http::Code::Ok, "Ya like DAGs?"); response.send(Pistache::Http::Code::Ok, "Ya like DAGs?");
} }
/* /*
* handleAuth will check any auth methods and handle any responses in the case of failed auth. If it returns * handleAuth will check any auth methods and handle any responses in the case
* false, callers should cease handling the response * of failed auth. If it returns false, callers should cease handling the
* response
*/ */
bool Server::handleAuth(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter &response) { bool Server::handleAuth(const Pistache::Rest::Request &request,
(void) response; Pistache::Http::ResponseWriter &response)
{
(void)response;
return true; return true;
} }
} } // namespace daggy

View File

@@ -1,13 +1,14 @@
#include <daggy/Serialization.hpp>
#include <daggy/Utilities.hpp>
#include <future> #include <future>
#include <iomanip> #include <iomanip>
#include <daggy/Utilities.hpp>
#include <daggy/Serialization.hpp>
using namespace std::chrono_literals; using namespace std::chrono_literals;
namespace daggy { namespace daggy {
std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement) { std::string globalSub(std::string string, const std::string &pattern,
const std::string &replacement)
{
size_t pos = string.find(pattern); size_t pos = string.find(pattern);
while (pos != std::string::npos) { while (pos != std::string::npos) {
string.replace(pos, pattern.size(), replacement); string.replace(pos, pattern.size(), replacement);
@@ -16,27 +17,32 @@ namespace daggy {
return string; return string;
} }
std::vector<std::vector<std::string>> std::vector<std::vector<std::string>> interpolateValues(
interpolateValues(const std::vector<std::string> &raw, const ConfigValues &values) { const std::vector<std::string> &raw, const ConfigValues &values)
{
std::vector<std::vector<std::string>> cooked{{}}; std::vector<std::vector<std::string>> cooked{{}};
for (const auto &part: raw) { for (const auto &part : raw) {
std::vector<std::string> expandedPart{part}; std::vector<std::string> expandedPart{part};
// Find all values of parameters, and expand them // Find all values of parameters, and expand them
for (const auto &[paramRaw, paramValue]: values) { for (const auto &[paramRaw, paramValue] : values) {
std::string param = "{{" + paramRaw + "}}"; std::string param = "{{" + paramRaw + "}}";
auto pos = part.find(param); auto pos = part.find(param);
if (pos == std::string::npos) continue; if (pos == std::string::npos)
continue;
std::vector<std::string> newExpandedPart; std::vector<std::string> newExpandedPart;
if (std::holds_alternative<std::string>(paramValue)) { if (std::holds_alternative<std::string>(paramValue)) {
for (auto &cmd: expandedPart) { for (auto &cmd : expandedPart) {
newExpandedPart.push_back(globalSub(cmd, param, std::get<std::string>(paramValue))); newExpandedPart.push_back(
globalSub(cmd, param, std::get<std::string>(paramValue)));
} }
} else { }
for (const auto &val: std::get<std::vector<std::string>>(paramValue)) { else {
for (auto cmd: expandedPart) { for (const auto &val :
std::get<std::vector<std::string>>(paramValue)) {
for (auto cmd : expandedPart) {
newExpandedPart.push_back(globalSub(cmd, param, val)); newExpandedPart.push_back(globalSub(cmd, param, val));
} }
} }
@@ -46,8 +52,8 @@ namespace daggy {
} }
std::vector<std::vector<std::string>> newCommands; std::vector<std::vector<std::string>> newCommands;
for (const auto &newPart: expandedPart) { for (const auto &newPart : expandedPart) {
for (auto cmd: cooked) { for (auto cmd : cooked) {
cmd.push_back(newPart); cmd.push_back(newPart);
newCommands.emplace_back(cmd); newCommands.emplace_back(cmd);
} }
@@ -57,17 +63,18 @@ namespace daggy {
return cooked; return cooked;
} }
TaskSet TaskSet expandTaskSet(const TaskSet &tasks,
expandTaskSet(const TaskSet &tasks,
executors::task::TaskExecutor &executor, executors::task::TaskExecutor &executor,
const ConfigValues &interpolatedValues) { const ConfigValues &interpolatedValues)
{
// Expand the tasks first // Expand the tasks first
TaskSet newTaskSet; TaskSet newTaskSet;
for (const auto &[baseName, task]: tasks) { for (const auto &[baseName, task] : tasks) {
executor.validateTaskParameters(task.job); executor.validateTaskParameters(task.job);
const auto newJobs = executor.expandTaskParameters(task.job, interpolatedValues); const auto newJobs =
executor.expandTaskParameters(task.job, interpolatedValues);
size_t i = 0; size_t i = 0;
for (const auto &newJob: newJobs) { for (const auto &newJob : newJobs) {
Task newTask{task}; Task newTask{task};
newTask.job = newJob; newTask.job = newJob;
newTaskSet.emplace(baseName + "_" + std::to_string(i), newTask); newTaskSet.emplace(baseName + "_" + std::to_string(i), newTask);
@@ -77,30 +84,34 @@ namespace daggy {
return newTaskSet; return newTaskSet;
} }
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks)
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks) { {
// Add the missing vertices // Add the missing vertices
for (const auto &[name, task]: tasks) { for (const auto &[name, task] : tasks) {
dag.addVertex(name, task); dag.addVertex(name, task);
} }
// Add edges // Add edges
for (const auto &[name, task]: tasks) { for (const auto &[name, task] : tasks) {
dag.addEdgeIf(name, [&](const auto &v) { return task.children.count(v.data.definedName) > 0; }); dag.addEdgeIf(name, [&](const auto &v) {
return task.children.count(v.data.definedName) > 0;
});
} }
if (! dag.isValid()) { if (!dag.isValid()) {
throw std::runtime_error("DAG contains a cycle"); throw std::runtime_error("DAG contains a cycle");
} }
} }
TaskDAG buildDAGFromTasks(TaskSet &tasks, TaskDAG buildDAGFromTasks(
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates) { TaskSet &tasks,
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates)
{
TaskDAG dag; TaskDAG dag;
updateDAGFromTasks(dag, tasks); updateDAGFromTasks(dag, tasks);
// Replay any updates // Replay any updates
for (const auto &update: updates) { for (const auto &update : updates) {
switch (update.newState) { switch (update.newState) {
case RunState::RUNNING: case RunState::RUNNING:
case RunState::RETRY: case RunState::RETRY:
@@ -118,12 +129,10 @@ namespace daggy {
return dag; return dag;
} }
TaskDAG runDAG(DAGRunID runID, TaskDAG runDAG(DAGRunID runID, executors::task::TaskExecutor &executor,
executors::task::TaskExecutor &executor, loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
loggers::dag_run::DAGRunLogger &logger, const ConfigValues parameters)
TaskDAG dag, {
const ConfigValues parameters
) {
logger.updateDAGRunState(runID, RunState::RUNNING); logger.updateDAGRunState(runID, RunState::RUNNING);
std::unordered_map<std::string, std::future<AttemptRecord>> runningTasks; std::unordered_map<std::string, std::future<AttemptRecord>> runningTasks;
@@ -133,7 +142,7 @@ namespace daggy {
size_t errored = 0; size_t errored = 0;
while (!dag.allVisited()) { while (!dag.allVisited()) {
// Check for any completed tasks // Check for any completed tasks
for (auto &[taskName, fut]: runningTasks) { for (auto &[taskName, fut] : runningTasks) {
if (fut.valid()) { if (fut.valid()) {
auto attempt = fut.get(); auto attempt = fut.get();
logger.logTaskAttempt(runID, taskName, attempt); logger.logTaskAttempt(runID, taskName, attempt);
@@ -145,20 +154,21 @@ namespace daggy {
// Parse the output and update the DAGs // Parse the output and update the DAGs
try { try {
auto newTasks = expandTaskSet(tasksFromJSON(attempt.outputLog), auto newTasks = expandTaskSet(tasksFromJSON(attempt.outputLog),
executor, executor, parameters);
parameters
);
updateDAGFromTasks(dag, newTasks); updateDAGFromTasks(dag, newTasks);
for (const auto &[ntName, ntTask]: newTasks) { for (const auto &[ntName, ntTask] : newTasks) {
logger.addTask(runID, ntName, ntTask); logger.addTask(runID, ntName, ntTask);
dag.addEdge(taskName, ntName); dag.addEdge(taskName, ntName);
task.children.insert(ntName); task.children.insert(ntName);
} }
logger.updateTask(runID, taskName, task); logger.updateTask(runID, taskName, task);
} catch (std::exception &e) { }
logger.logTaskAttempt(runID, taskName, catch (std::exception &e) {
AttemptRecord{.executorLog = logger.logTaskAttempt(
runID, taskName,
AttemptRecord{
.executorLog =
std::string{"Failed to parse JSON output: "} + std::string{"Failed to parse JSON output: "} +
e.what()}); e.what()});
logger.updateTaskState(runID, taskName, RunState::ERRORED); logger.updateTaskState(runID, taskName, RunState::ERRORED);
@@ -167,13 +177,15 @@ namespace daggy {
} }
dag.completeVisit(taskName); dag.completeVisit(taskName);
--running; --running;
} else { }
else {
// RC isn't 0 // RC isn't 0
if (taskAttemptCounts[taskName] <= task.maxRetries) { if (taskAttemptCounts[taskName] <= task.maxRetries) {
logger.updateTaskState(runID, taskName, RunState::RETRY); logger.updateTaskState(runID, taskName, RunState::RETRY);
runningTasks[taskName] = executor.execute(taskName, task); runningTasks[taskName] = executor.execute(taskName, task);
++taskAttemptCounts[taskName]; ++taskAttemptCounts[taskName];
} else { }
else {
logger.updateTaskState(runID, taskName, RunState::ERRORED); logger.updateTaskState(runID, taskName, RunState::ERRORED);
++errored; ++errored;
} }
@@ -181,7 +193,8 @@ namespace daggy {
} }
} }
// Add all remaining tasks in a task queue to avoid dominating the thread pool // Add all remaining tasks in a task queue to avoid dominating the thread
// pool
auto t = dag.visitNext(); auto t = dag.visitNext();
while (t.has_value()) { while (t.has_value()) {
// Schedule the task to run // Schedule the task to run
@@ -194,7 +207,8 @@ namespace daggy {
++running; ++running;
auto nextTask = dag.visitNext(); auto nextTask = dag.visitNext();
if (not nextTask.has_value()) break; if (not nextTask.has_value())
break;
t.emplace(nextTask.value()); t.emplace(nextTask.value());
} }
if (running > 0 and errored == running) { if (running > 0 and errored == running) {
@@ -211,9 +225,10 @@ namespace daggy {
return dag; return dag;
} }
std::ostream &operator<<(std::ostream &os, const TimePoint &tp) { std::ostream &operator<<(std::ostream &os, const TimePoint &tp)
{
auto t_c = Clock::to_time_t(tp); auto t_c = Clock::to_time_t(tp);
os << std::put_time(std::localtime(&t_c), "%Y-%m-%d %H:%M:%S %Z"); os << std::put_time(std::localtime(&t_c), "%Y-%m-%d %H:%M:%S %Z");
return os; return os;
} }
} } // namespace daggy

View File

@@ -1,27 +1,32 @@
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/Utilities.hpp>
#include <fcntl.h> #include <fcntl.h>
#include <poll.h>
#include <unistd.h> #include <unistd.h>
#include <wait.h> #include <wait.h>
#include <poll.h>
#include <daggy/Utilities.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
using namespace daggy::executors::task; using namespace daggy::executors::task;
std::string slurp(int fd) { std::string slurp(int fd)
{
std::string result; std::string result;
const ssize_t BUFFER_SIZE = 4096; const ssize_t BUFFER_SIZE = 4096;
char buffer[BUFFER_SIZE]; char buffer[BUFFER_SIZE];
struct pollfd pfd{.fd = fd, .events = POLLIN, .revents = 0}; struct pollfd pfd
{
.fd = fd, .events = POLLIN, .revents = 0
};
poll(&pfd, 1, 1); poll(&pfd, 1, 1);
while (pfd.revents & POLLIN) { while (pfd.revents & POLLIN) {
ssize_t bytes = read(fd, buffer, BUFFER_SIZE); ssize_t bytes = read(fd, buffer, BUFFER_SIZE);
if (bytes == 0) { if (bytes == 0) {
break; break;
} else { }
else {
result.append(buffer, bytes); result.append(buffer, bytes);
} }
pfd.revents = 0; pfd.revents = 0;
@@ -31,14 +36,14 @@ std::string slurp(int fd) {
return result; return result;
} }
std::future<daggy::AttemptRecord> ForkingTaskExecutor::execute(
std::future<daggy::AttemptRecord> const std::string &taskName, const Task &task)
ForkingTaskExecutor::execute(const std::string &taskName, const Task &task) { {
return tp_.addTask([this, task](){return this->runTask(task);}); return tp_.addTask([this, task]() { return this->runTask(task); });
} }
daggy::AttemptRecord daggy::AttemptRecord ForkingTaskExecutor::runTask(const Task &task)
ForkingTaskExecutor::runTask(const Task & task) { {
AttemptRecord rec; AttemptRecord rec;
rec.startTime = Clock::now(); rec.startTime = Clock::now();
@@ -46,28 +51,30 @@ ForkingTaskExecutor::runTask(const Task & task) {
// Need to convert the strings // Need to convert the strings
std::vector<char *> argv; std::vector<char *> argv;
const auto command = std::get<Command>(task.job.at("command")); const auto command = std::get<Command>(task.job.at("command"));
std::transform(command.begin(), std::transform(
command.end(), command.begin(), command.end(), std::back_inserter(argv),
std::back_inserter(argv), [](const std::string &s) { return const_cast<char *>(s.c_str()); });
[](const std::string & s) {
return const_cast<char *>(s.c_str());
});
argv.push_back(nullptr); argv.push_back(nullptr);
// Create the pipe // Create the pipe
int stdoutPipe[2]; int stdoutPipe[2];
int pipeRC = pipe2(stdoutPipe, O_DIRECT); int pipeRC = pipe2(stdoutPipe, O_DIRECT);
if (pipeRC != 0) throw std::runtime_error("Unable to create pipe for stdout"); if (pipeRC != 0)
throw std::runtime_error("Unable to create pipe for stdout");
int stderrPipe[2]; int stderrPipe[2];
pipeRC = pipe2(stderrPipe, O_DIRECT); pipeRC = pipe2(stderrPipe, O_DIRECT);
if (pipeRC != 0) throw std::runtime_error("Unable to create pipe for stderr"); if (pipeRC != 0)
throw std::runtime_error("Unable to create pipe for stderr");
pid_t child = fork(); pid_t child = fork();
if (child < 0) { if (child < 0) {
throw std::runtime_error("Unable to fork child"); throw std::runtime_error("Unable to fork child");
} else if (child == 0) { // child }
while ((dup2(stdoutPipe[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {} else if (child == 0) { // child
while ((dup2(stderrPipe[1], STDERR_FILENO) == -1) && (errno == EINTR)) {} while ((dup2(stdoutPipe[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {
}
while ((dup2(stderrPipe[1], STDERR_FILENO) == -1) && (errno == EINTR)) {
}
close(stdoutPipe[0]); close(stdoutPipe[0]);
close(stderrPipe[0]); close(stderrPipe[0]);
execvp(argv[0], argv.data()); execvp(argv[0], argv.data());
@@ -75,8 +82,14 @@ ForkingTaskExecutor::runTask(const Task & task) {
} }
std::atomic<bool> running = true; std::atomic<bool> running = true;
std::thread stdoutReader([&]() { while (running) rec.outputLog.append(slurp(stdoutPipe[0])); }); std::thread stdoutReader([&]() {
std::thread stderrReader([&]() { while (running) rec.errorLog.append(slurp(stderrPipe[0])); }); while (running)
rec.outputLog.append(slurp(stdoutPipe[0]));
});
std::thread stderrReader([&]() {
while (running)
rec.errorLog.append(slurp(stderrPipe[0]));
});
int rc = 0; int rc = 0;
waitpid(child, &rc, 0); waitpid(child, &rc, 0);
@@ -85,7 +98,8 @@ ForkingTaskExecutor::runTask(const Task & task) {
rec.stopTime = Clock::now(); rec.stopTime = Clock::now();
if (WIFEXITED(rc)) { if (WIFEXITED(rc)) {
rec.rc = WEXITSTATUS(rc); rec.rc = WEXITSTATUS(rc);
} else { }
else {
rec.rc = -1; rec.rc = -1;
} }
@@ -98,21 +112,25 @@ ForkingTaskExecutor::runTask(const Task & task) {
return rec; return rec;
} }
bool ForkingTaskExecutor::validateTaskParameters(const ConfigValues &job) { bool ForkingTaskExecutor::validateTaskParameters(const ConfigValues &job)
{
auto it = job.find("command"); auto it = job.find("command");
if (it == job.end()) if (it == job.end())
throw std::runtime_error(R"(job does not have a "command" argument)"); throw std::runtime_error(R"(job does not have a "command" argument)");
if (!std::holds_alternative<Command>(it->second)) if (!std::holds_alternative<Command>(it->second))
throw std::runtime_error(R"(taskParameter's "command" must be an array of strings)"); throw std::runtime_error(
R"(taskParameter's "command" must be an array of strings)");
return true; return true;
} }
std::vector<daggy::ConfigValues> std::vector<daggy::ConfigValues> ForkingTaskExecutor::expandTaskParameters(
ForkingTaskExecutor::expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) { const ConfigValues &job, const ConfigValues &expansionValues)
{
std::vector<ConfigValues> newValues; std::vector<ConfigValues> newValues;
const auto command = std::get<Command>(job.at("command")); const auto command = std::get<Command>(job.at("command"));
for (const auto &expandedCommand: interpolateValues(command, expansionValues)) { for (const auto &expandedCommand :
interpolateValues(command, expansionValues)) {
ConfigValues newCommand{job}; ConfigValues newCommand{job};
newCommand.at("command") = expandedCommand; newCommand.at("command") = expandedCommand;
newValues.emplace_back(newCommand); newValues.emplace_back(newCommand);

View File

@@ -1,37 +1,40 @@
#include <daggy/executors/task/NoopTaskExecutor.hpp>
#include <daggy/Utilities.hpp> #include <daggy/Utilities.hpp>
#include <daggy/executors/task/NoopTaskExecutor.hpp>
namespace daggy::executors::task { namespace daggy::executors::task {
std::future<daggy::AttemptRecord> std::future<daggy::AttemptRecord> NoopTaskExecutor::execute(
NoopTaskExecutor::execute(const std::string &taskName, const Task &task) { const std::string &taskName, const Task &task)
{
std::promise<daggy::AttemptRecord> promise; std::promise<daggy::AttemptRecord> promise;
auto ts = Clock::now(); auto ts = Clock::now();
promise.set_value(AttemptRecord{ promise.set_value(AttemptRecord{.startTime = ts,
.startTime = ts,
.stopTime = ts, .stopTime = ts,
.rc = 0, .rc = 0,
.executorLog = taskName, .executorLog = taskName,
.outputLog = taskName, .outputLog = taskName,
.errorLog = taskName .errorLog = taskName});
});
return promise.get_future(); return promise.get_future();
} }
bool NoopTaskExecutor::validateTaskParameters(const ConfigValues &job) { bool NoopTaskExecutor::validateTaskParameters(const ConfigValues &job)
{
auto it = job.find("command"); auto it = job.find("command");
if (it == job.end()) if (it == job.end())
throw std::runtime_error(R"(job does not have a "command" argument)"); throw std::runtime_error(R"(job does not have a "command" argument)");
if (!std::holds_alternative<Command>(it->second)) if (!std::holds_alternative<Command>(it->second))
throw std::runtime_error(R"(taskParameter's "command" must be an array of strings)"); throw std::runtime_error(
R"(taskParameter's "command" must be an array of strings)");
return true; return true;
} }
std::vector<daggy::ConfigValues> std::vector<daggy::ConfigValues> NoopTaskExecutor::expandTaskParameters(
NoopTaskExecutor::expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) { const ConfigValues &job, const ConfigValues &expansionValues)
{
std::vector<ConfigValues> newValues; std::vector<ConfigValues> newValues;
const auto command = std::get<Command>(job.at("command")); const auto command = std::get<Command>(job.at("command"));
for (const auto &expandedCommand: interpolateValues(command, expansionValues)) { for (const auto &expandedCommand :
interpolateValues(command, expansionValues)) {
ConfigValues newCommand{job}; ConfigValues newCommand{job};
newCommand.at("command") = expandedCommand; newCommand.at("command") = expandedCommand;
newValues.emplace_back(newCommand); newValues.emplace_back(newCommand);
@@ -39,4 +42,4 @@ namespace daggy::executors::task {
return newValues; return newValues;
} }
} } // namespace daggy::executors::task

View File

@@ -1,32 +1,32 @@
#include <iterator> #include <iterator>
#include <stdexcept> #include <stdexcept>
#ifdef DAGGY_ENABLE_SLURM #ifdef DAGGY_ENABLE_SLURM
#include <random> #include <slurm/slurm.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <daggy/Utilities.hpp>
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
#include <random>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <slurm/slurm.h>
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
#include <daggy/Utilities.hpp>
namespace fs = std::filesystem; namespace fs = std::filesystem;
namespace daggy::executors::task { namespace daggy::executors::task {
std::string getUniqueTag(size_t nChars = 6) { std::string getUniqueTag(size_t nChars = 6)
{
std::string result(nChars, '\0'); std::string result(nChars, '\0');
static std::random_device dev; static std::random_device dev;
static std::mt19937 rng(dev()); static std::mt19937 rng(dev());
std::uniform_int_distribution<int> dist(0, 61); std::uniform_int_distribution<int> dist(0, 61);
const char *v = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; const char *v =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (size_t i = 0; i < nChars; i++) { for (size_t i = 0; i < nChars; i++) {
result[i] = v[dist(rng)]; result[i] = v[dist(rng)];
@@ -34,8 +34,10 @@ namespace daggy::executors::task {
return result; return result;
} }
void readAndClean(const fs::path & fn, std::string & dest) { void readAndClean(const fs::path &fn, std::string &dest)
if (! fs::exists(fn)) return; {
if (!fs::exists(fn))
return;
std::ifstream ifh; std::ifstream ifh;
ifh.open(fn); ifh.open(fn);
@@ -50,7 +52,8 @@ namespace daggy::executors::task {
: running_(true) : running_(true)
, monitorWorker_(&SlurmTaskExecutor::monitor, this) , monitorWorker_(&SlurmTaskExecutor::monitor, this)
{ {
std::string priority = "SLURM_PRIO_PROCESS=" + std::to_string(getpriority(PRIO_PROCESS, 0)); std::string priority =
"SLURM_PRIO_PROCESS=" + std::to_string(getpriority(PRIO_PROCESS, 0));
std::string submitDir = "SLURM_SUBMIT_DIR=" + fs::current_path().string(); std::string submitDir = "SLURM_SUBMIT_DIR=" + fs::current_path().string();
const size_t MAX_HOSTNAME_LENGTH = 50; const size_t MAX_HOSTNAME_LENGTH = 50;
@@ -63,10 +66,8 @@ namespace daggy::executors::task {
umask(mask); // Restore the old mask umask(mask); // Restore the old mask
std::stringstream ss; std::stringstream ss;
ss << "SLURM_UMASK=0" ss << "SLURM_UMASK=0" << uint32_t{((mask >> 6) & 07)}
<< uint32_t{((mask >> 6) & 07)} << uint32_t{((mask >> 3) & 07)} << uint32_t{(mask & 07)};
<< uint32_t{((mask >> 3) & 07)}
<< uint32_t{(mask & 07)};
// Set some environment variables // Set some environment variables
putenv(const_cast<char *>(priority.c_str())); putenv(const_cast<char *>(priority.c_str()));
@@ -75,26 +76,22 @@ namespace daggy::executors::task {
putenv(const_cast<char *>(ss.str().c_str())); putenv(const_cast<char *>(ss.str().c_str()));
} }
SlurmTaskExecutor::~SlurmTaskExecutor() { SlurmTaskExecutor::~SlurmTaskExecutor()
{
running_ = false; running_ = false;
monitorWorker_.join(); monitorWorker_.join();
} }
// Validates the job to ensure that all required values are set and are of the right type, // Validates the job to ensure that all required values are set and are of the
bool SlurmTaskExecutor::validateTaskParameters(const ConfigValues &job) { // right type,
bool SlurmTaskExecutor::validateTaskParameters(const ConfigValues &job)
{
const std::unordered_set<std::string> requiredFields{ const std::unordered_set<std::string> requiredFields{
"minCPUs", "minCPUs", "minMemoryMB", "minTmpDiskMB",
"minMemoryMB", "priority", "timeLimitSeconds", "userID",
"minTmpDiskMB", "workDir", "tmpDir", "command"};
"priority",
"timeLimitSeconds",
"userID",
"workDir",
"tmpDir",
"command"
};
for (const auto &requiredField: requiredFields) { for (const auto &requiredField : requiredFields) {
if (job.count(requiredField) == 0) { if (job.count(requiredField) == 0) {
throw std::runtime_error("Missing field " + requiredField); throw std::runtime_error("Missing field " + requiredField);
} }
@@ -102,12 +99,14 @@ namespace daggy::executors::task {
return true; return true;
} }
std::vector<ConfigValues> std::vector<ConfigValues> SlurmTaskExecutor::expandTaskParameters(
SlurmTaskExecutor::expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) { const ConfigValues &job, const ConfigValues &expansionValues)
{
std::vector<ConfigValues> newValues; std::vector<ConfigValues> newValues;
const auto command = std::get<Command>(job.at("command")); const auto command = std::get<Command>(job.at("command"));
for (const auto &expandedCommand: interpolateValues(command, expansionValues)) { for (const auto &expandedCommand :
interpolateValues(command, expansionValues)) {
ConfigValues newCommand{job}; ConfigValues newCommand{job};
newCommand.at("command") = expandedCommand; newCommand.at("command") = expandedCommand;
newValues.emplace_back(newCommand); newValues.emplace_back(newCommand);
@@ -116,8 +115,9 @@ namespace daggy::executors::task {
return newValues; return newValues;
} }
std::future<AttemptRecord> std::future<AttemptRecord> SlurmTaskExecutor::execute(
SlurmTaskExecutor::execute(const std::string &taskName, const Task &task) { const std::string &taskName, const Task &task)
{
std::stringstream executorLog; std::stringstream executorLog;
const auto &job = task.job; const auto &job = task.job;
@@ -130,13 +130,11 @@ namespace daggy::executors::task {
// Convert command to argc / argv // Convert command to argc / argv
std::vector<char *> argv{nullptr}; std::vector<char *> argv{nullptr};
const auto command = std::get<std::vector<std::string>>(task.job.at("command")); const auto command =
std::transform(command.begin(), std::get<std::vector<std::string>>(task.job.at("command"));
command.end(), std::transform(
std::back_inserter(argv), command.begin(), command.end(), std::back_inserter(argv),
[](const std::string & s) { [](const std::string &s) { return const_cast<char *>(s.c_str()); });
return const_cast<char *>(s.c_str());
});
char empty[] = ""; char empty[] = "";
char *env[1]; char *env[1];
@@ -156,10 +154,12 @@ namespace daggy::executors::task {
jd.min_cpus = std::stoi(std::get<std::string>(job.at("minCPUs"))); jd.min_cpus = std::stoi(std::get<std::string>(job.at("minCPUs")));
jd.pn_min_memory = std::stoi(std::get<std::string>(job.at("minMemoryMB"))); jd.pn_min_memory = std::stoi(std::get<std::string>(job.at("minMemoryMB")));
jd.pn_min_tmp_disk = std::stoi(std::get<std::string>(job.at("minTmpDiskMB"))); jd.pn_min_tmp_disk =
std::stoi(std::get<std::string>(job.at("minTmpDiskMB")));
jd.priority = std::stoi(std::get<std::string>(job.at("priority"))); jd.priority = std::stoi(std::get<std::string>(job.at("priority")));
jd.shared = 0; jd.shared = 0;
jd.time_limit = std::stoi(std::get<std::string>(job.at("timeLimitSeconds"))); jd.time_limit =
std::stoi(std::get<std::string>(job.at("timeLimitSeconds")));
jd.min_nodes = 1; jd.min_nodes = 1;
jd.user_id = std::stoi(std::get<std::string>(job.at("userID"))); jd.user_id = std::stoi(std::get<std::string>(job.at("userID")));
jd.argv = argv.data(); jd.argv = argv.data();
@@ -183,8 +183,7 @@ namespace daggy::executors::task {
error_code = slurm_submit_batch_job(&jd, &resp_msg); error_code = slurm_submit_batch_job(&jd, &resp_msg);
if (error_code) { if (error_code) {
std::stringstream ss; std::stringstream ss;
ss << "Unable to submit slurm job: " ss << "Unable to submit slurm job: " << slurm_strerror(error_code);
<< slurm_strerror(error_code);
throw std::runtime_error(ss.str()); throw std::runtime_error(ss.str());
} }
@@ -193,33 +192,33 @@ namespace daggy::executors::task {
slurm_free_submit_response_response_msg(resp_msg); slurm_free_submit_response_response_msg(resp_msg);
std::lock_guard<std::mutex> lock(promiseGuard_); std::lock_guard<std::mutex> lock(promiseGuard_);
Job newJob{ Job newJob{.prom{}, .stdoutFile = stdoutFile, .stderrFile = stderrFile};
.prom{},
.stdoutFile = stdoutFile,
.stderrFile = stderrFile
};
auto fut = newJob.prom.get_future(); auto fut = newJob.prom.get_future();
runningJobs_.emplace(jobID, std::move(newJob)); runningJobs_.emplace(jobID, std::move(newJob));
return fut; return fut;
} }
void SlurmTaskExecutor::monitor() { void SlurmTaskExecutor::monitor()
{
std::unordered_set<size_t> resolvedJobs; std::unordered_set<size_t> resolvedJobs;
while (running_) { while (running_) {
{ {
std::lock_guard<std::mutex> lock(promiseGuard_); std::lock_guard<std::mutex> lock(promiseGuard_);
for (auto & [jobID, job] : runningJobs_) { for (auto &[jobID, job] : runningJobs_) {
job_info_msg_t * jobStatus; job_info_msg_t *jobStatus;
int error_code = slurm_load_job(&jobStatus, jobID, SHOW_ALL | SHOW_DETAIL); int error_code =
if (error_code != SLURM_SUCCESS) continue; slurm_load_job(&jobStatus, jobID, SHOW_ALL | SHOW_DETAIL);
if (error_code != SLURM_SUCCESS)
continue;
uint32_t idx = jobStatus->record_count; uint32_t idx = jobStatus->record_count;
if (idx == 0) continue; if (idx == 0)
continue;
idx--; idx--;
const slurm_job_info_t & jobInfo = jobStatus->job_array[idx]; const slurm_job_info_t &jobInfo = jobStatus->job_array[idx];
AttemptRecord record; AttemptRecord record;
switch(jobInfo.job_state) { switch (jobInfo.job_state) {
case JOB_PENDING: case JOB_PENDING:
case JOB_SUSPENDED: case JOB_SUSPENDED:
case JOB_RUNNING: case JOB_RUNNING:
@@ -243,7 +242,8 @@ namespace daggy::executors::task {
record.executorLog = "Job terminated due to pre-emption.\n"; record.executorLog = "Job terminated due to pre-emption.\n";
break; break;
case JOB_BOOT_FAIL: /* terminated due to node boot failure */ case JOB_BOOT_FAIL: /* terminated due to node boot failure */
record.executorLog = "Job failed to run due to failure of compute node to boot.\n"; record.executorLog =
"Job failed to run due to failure of compute node to boot.\n";
break; break;
case JOB_DEADLINE: /* terminated on deadline */ case JOB_DEADLINE: /* terminated on deadline */
record.executorLog = "Job terminated due to deadline.\n"; record.executorLog = "Job terminated due to deadline.\n";
@@ -270,5 +270,5 @@ namespace daggy::executors::task {
std::this_thread::sleep_for(std::chrono::milliseconds(250)); std::this_thread::sleep_for(std::chrono::milliseconds(250));
} }
} }
} } // namespace daggy::executors::task
#endif #endif

View File

@@ -1,45 +1,59 @@
#include <fstream>
#include <iomanip>
#include <enum.h> #include <enum.h>
#include <daggy/loggers/dag_run/FileSystemLogger.hpp>
#include <daggy/Serialization.hpp> #include <daggy/Serialization.hpp>
#include <daggy/Utilities.hpp> #include <daggy/Utilities.hpp>
#include <daggy/loggers/dag_run/FileSystemLogger.hpp>
#include <fstream>
#include <iomanip>
namespace fs = std::filesystem; namespace fs = std::filesystem;
using namespace daggy::loggers::dag_run; using namespace daggy::loggers::dag_run;
namespace daggy { namespace daggy {
inline const fs::path FileSystemLogger::getCurrentPath() const { return root_ / "current"; } inline const fs::path FileSystemLogger::getCurrentPath() const
{
return root_ / "current";
}
inline const fs::path FileSystemLogger::getRunsRoot() const { return root_ / "runs"; } inline const fs::path FileSystemLogger::getRunsRoot() const
{
return root_ / "runs";
}
inline const fs::path FileSystemLogger::getRunRoot(DAGRunID runID) const { inline const fs::path FileSystemLogger::getRunRoot(DAGRunID runID) const
{
return getRunsRoot() / std::to_string(runID); return getRunsRoot() / std::to_string(runID);
} }
FileSystemLogger::FileSystemLogger(fs::path root) FileSystemLogger::FileSystemLogger(fs::path root)
: root_(root), nextRunID_(0) { : root_(root)
const std::vector<fs::path> reqPaths{root_, getCurrentPath(), getRunsRoot()}; , nextRunID_(0)
for (const auto &path: reqPaths) { {
if (!fs::exists(path)) { fs::create_directories(path); } const std::vector<fs::path> reqPaths{root_, getCurrentPath(),
getRunsRoot()};
for (const auto &path : reqPaths) {
if (!fs::exists(path)) {
fs::create_directories(path);
}
} }
// Get the next run ID // Get the next run ID
for (auto &dir: fs::directory_iterator(getRunsRoot())) { for (auto &dir : fs::directory_iterator(getRunsRoot())) {
try { try {
size_t runID = std::stoull(dir.path().stem()); size_t runID = std::stoull(dir.path().stem());
if (runID > nextRunID_) nextRunID_ = runID + 1; if (runID > nextRunID_)
} catch (std::exception &e) { nextRunID_ = runID + 1;
}
catch (std::exception &e) {
continue; continue;
} }
} }
} }
// Execution // Execution
DAGRunID FileSystemLogger::startDAGRun(std::string name, const TaskSet &tasks) { DAGRunID FileSystemLogger::startDAGRun(std::string name, const TaskSet &tasks)
{
DAGRunID runID = nextRunID_++; DAGRunID runID = nextRunID_++;
// TODO make this threadsafe // TODO make this threadsafe
@@ -51,12 +65,14 @@ namespace daggy {
fs::create_directories(runRoot); fs::create_directories(runRoot);
// Create meta.json with DAGRun Name and task definitions // Create meta.json with DAGRun Name and task definitions
std::ofstream ofh(runRoot / "metadata.json", std::ios::trunc | std::ios::binary); std::ofstream ofh(runRoot / "metadata.json",
ofh << R"({ "name": )" << std::quoted(name) << R"(, "tasks": )" << tasksToJSON(tasks) << "}\n"; std::ios::trunc | std::ios::binary);
ofh << R"({ "name": )" << std::quoted(name) << R"(, "tasks": )"
<< tasksToJSON(tasks) << "}\n";
ofh.close(); ofh.close();
// Task directories // Task directories
for (const auto &[name, task]: tasks) { for (const auto &[name, task] : tasks) {
auto taskDir = runRoot / name; auto taskDir = runRoot / name;
fs::create_directories(taskDir); fs::create_directories(taskDir);
std::ofstream ofh(taskDir / "states.csv"); std::ofstream ofh(taskDir / "states.csv");
@@ -65,19 +81,25 @@ namespace daggy {
return runID; return runID;
} }
void FileSystemLogger::updateDAGRunState(DAGRunID dagRunID, RunState state) { void FileSystemLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
std::ofstream ofh(getRunRoot(dagRunID) / "states.csv", std::ios::binary | std::ios::app); {
ofh << std::quoted(timePointToString(Clock::now())) << ',' << state._to_string() << '\n'; std::ofstream ofh(getRunRoot(dagRunID) / "states.csv",
std::ios::binary | std::ios::app);
ofh << std::quoted(timePointToString(Clock::now())) << ','
<< state._to_string() << '\n';
ofh.flush(); ofh.flush();
ofh.close(); ofh.close();
} }
void void FileSystemLogger::logTaskAttempt(DAGRunID dagRunID,
FileSystemLogger::logTaskAttempt(DAGRunID dagRunID, const std::string &taskName, const std::string &taskName,
const AttemptRecord &attempt) { const AttemptRecord &attempt)
{
auto taskRoot = getRunRoot(dagRunID) / taskName; auto taskRoot = getRunRoot(dagRunID) / taskName;
size_t i = 1; size_t i = 1;
while (fs::exists(taskRoot / std::to_string(i))) { ++i; } while (fs::exists(taskRoot / std::to_string(i))) {
++i;
}
auto attemptDir = taskRoot / std::to_string(i); auto attemptDir = taskRoot / std::to_string(i);
fs::create_directories(attemptDir); fs::create_directories(attemptDir);
@@ -87,8 +109,10 @@ namespace daggy {
// Metadata // Metadata
ofh.open(attemptDir / "metadata.json"); ofh.open(attemptDir / "metadata.json");
ofh << "{\n" ofh << "{\n"
<< R"("startTime": )" << std::quoted(timePointToString(attempt.startTime)) << ",\n" << R"("startTime": )"
<< R"("stopTime": )" << std::quoted(timePointToString(attempt.stopTime)) << ",\n" << std::quoted(timePointToString(attempt.startTime)) << ",\n"
<< R"("stopTime": )" << std::quoted(timePointToString(attempt.stopTime))
<< ",\n"
<< R"("rc": )" << attempt.rc << '\n' << R"("rc": )" << attempt.rc << '\n'
<< '}'; << '}';
@@ -108,19 +132,26 @@ namespace daggy {
ofh.close(); ofh.close();
} }
void FileSystemLogger::updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) { void FileSystemLogger::updateTaskState(DAGRunID dagRunID,
std::ofstream ofh(getRunRoot(dagRunID) / taskName / "states.csv", std::ios::binary | std::ios::app); const std::string &taskName,
ofh << std::quoted(timePointToString(Clock::now())) << ',' << state._to_string() << '\n'; RunState state)
{
std::ofstream ofh(getRunRoot(dagRunID) / taskName / "states.csv",
std::ios::binary | std::ios::app);
ofh << std::quoted(timePointToString(Clock::now())) << ','
<< state._to_string() << '\n';
ofh.flush(); ofh.flush();
ofh.close(); ofh.close();
} }
// Querying // Querying
std::vector<DAGRunSummary> FileSystemLogger::getDAGs(uint32_t stateMask) { std::vector<DAGRunSummary> FileSystemLogger::getDAGs(uint32_t stateMask)
{
return {}; return {};
} }
DAGRunRecord FileSystemLogger::getDAGRun(DAGRunID dagRunID) { DAGRunRecord FileSystemLogger::getDAGRun(DAGRunID dagRunID)
{
DAGRunRecord record; DAGRunRecord record;
auto runRoot = getRunRoot(dagRunID); auto runRoot = getRunRoot(dagRunID);
if (!fs::exists(runRoot)) { if (!fs::exists(runRoot)) {
@@ -150,15 +181,14 @@ namespace daggy {
std::getline(ss, time, ','); std::getline(ss, time, ',');
std::getline(ss, state); std::getline(ss, state);
record.dagStateChanges.emplace_back(DAGUpdateRecord{ record.dagStateChanges.emplace_back(
.time = stringToTimePoint(time), DAGUpdateRecord{.time = stringToTimePoint(time),
.newState = RunState::_from_string(state.c_str()) .newState = RunState::_from_string(state.c_str())});
});
} }
ifh.close(); ifh.close();
// Task states // Task states
for (const auto &[taskName, task]: record.tasks) { for (const auto &[taskName, task] : record.tasks) {
auto taskStateFile = runRoot / taskName / "states.csv"; auto taskStateFile = runRoot / taskName / "states.csv";
if (!fs::exists(taskStateFile)) { if (!fs::exists(taskStateFile)) {
record.taskRunStates.emplace(taskName, RunState::QUEUED); record.taskRunStates.emplace(taskName, RunState::QUEUED);
@@ -166,13 +196,17 @@ namespace daggy {
} }
ifh.open(taskStateFile); ifh.open(taskStateFile);
while (std::getline(ifh, line)) { continue; } while (std::getline(ifh, line)) {
continue;
}
std::stringstream ss{line}; std::stringstream ss{line};
while (std::getline(ss, token, ',')) { continue; } while (std::getline(ss, token, ',')) {
continue;
}
RunState taskState = RunState::_from_string(token.c_str()); RunState taskState = RunState::_from_string(token.c_str());
record.taskRunStates.emplace(taskName, taskState); record.taskRunStates.emplace(taskName, taskState);
ifh.close(); ifh.close();
} }
return record; return record;
} }
} } // namespace daggy

View File

@@ -1,107 +1,121 @@
#include <iterator>
#include <algorithm>
#include <enum.h> #include <enum.h>
#include <daggy/loggers/dag_run/OStreamLogger.hpp> #include <algorithm>
#include <daggy/Serialization.hpp> #include <daggy/Serialization.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <iterator>
namespace daggy { namespace daggy { namespace loggers { namespace dag_run {
namespace loggers { OStreamLogger::OStreamLogger(std::ostream &os)
namespace dag_run { : os_(os)
OStreamLogger::OStreamLogger(std::ostream &os) : os_(os) {} {
}
// Execution // Execution
DAGRunID OStreamLogger::startDAGRun(std::string name, const TaskSet &tasks) { DAGRunID OStreamLogger::startDAGRun(std::string name, const TaskSet &tasks)
{
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
size_t runID = dagRuns_.size(); size_t runID = dagRuns_.size();
dagRuns_.push_back({ dagRuns_.push_back({.name = name, .tasks = tasks});
.name = name, for (const auto &[name, _] : tasks) {
.tasks = tasks
});
for (const auto &[name, _]: tasks) {
_updateTaskState(runID, name, RunState::QUEUED); _updateTaskState(runID, name, RunState::QUEUED);
} }
_updateDAGRunState(runID, RunState::QUEUED); _updateDAGRunState(runID, RunState::QUEUED);
os_ << "Starting new DAGRun named " << name << " with ID " << runID << " and " << tasks.size() os_ << "Starting new DAGRun named " << name << " with ID " << runID
<< " tasks" << std::endl; << " and " << tasks.size() << " tasks" << std::endl;
for (const auto &[name, task]: tasks) { for (const auto &[name, task] : tasks) {
os_ << "TASK (" << name << "): " << configToJSON(task.job); os_ << "TASK (" << name << "): " << configToJSON(task.job);
os_ << std::endl; os_ << std::endl;
} }
return runID; return runID;
} }
void OStreamLogger::addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) { void OStreamLogger::addTask(DAGRunID dagRunID, const std::string taskName,
const Task &task)
{
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
auto &dagRun = dagRuns_[dagRunID]; auto &dagRun = dagRuns_[dagRunID];
dagRun.tasks[taskName] = task; dagRun.tasks[taskName] = task;
_updateTaskState(dagRunID, taskName, RunState::QUEUED); _updateTaskState(dagRunID, taskName, RunState::QUEUED);
} }
void OStreamLogger::updateTask(DAGRunID dagRunID, const std::string taskName, const Task &task) { void OStreamLogger::updateTask(DAGRunID dagRunID, const std::string taskName,
const Task &task)
{
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
auto &dagRun = dagRuns_[dagRunID]; auto &dagRun = dagRuns_[dagRunID];
dagRun.tasks[taskName] = task; dagRun.tasks[taskName] = task;
} }
void OStreamLogger::updateDAGRunState(DAGRunID dagRunID, RunState state) { void OStreamLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
{
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
_updateDAGRunState(dagRunID, state); _updateDAGRunState(dagRunID, state);
} }
void OStreamLogger::_updateDAGRunState(DAGRunID dagRunID, RunState state) { void OStreamLogger::_updateDAGRunState(DAGRunID dagRunID, RunState state)
os_ << "DAG State Change(" << dagRunID << "): " << state._to_string() << std::endl; {
os_ << "DAG State Change(" << dagRunID << "): " << state._to_string()
<< std::endl;
dagRuns_[dagRunID].dagStateChanges.push_back({Clock::now(), state}); dagRuns_[dagRunID].dagStateChanges.push_back({Clock::now(), state});
} }
void OStreamLogger::logTaskAttempt(DAGRunID dagRunID, const std::string &taskName, void OStreamLogger::logTaskAttempt(DAGRunID dagRunID,
const AttemptRecord &attempt) { const std::string &taskName,
const AttemptRecord &attempt)
{
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
const std::string &msg = attempt.rc == 0 ? attempt.outputLog : attempt.errorLog; const std::string &msg =
os_ << "Task Attempt (" << dagRunID << '/' << taskName << "): Ran with RC " << attempt.rc << ": " attempt.rc == 0 ? attempt.outputLog : attempt.errorLog;
<< msg << std::endl; os_ << "Task Attempt (" << dagRunID << '/' << taskName << "): Ran with RC "
<< attempt.rc << ": " << msg << std::endl;
dagRuns_[dagRunID].taskAttempts[taskName].push_back(attempt); dagRuns_[dagRunID].taskAttempts[taskName].push_back(attempt);
} }
void OStreamLogger::updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) { void OStreamLogger::updateTaskState(DAGRunID dagRunID,
const std::string &taskName,
RunState state)
{
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
_updateTaskState(dagRunID, taskName, state); _updateTaskState(dagRunID, taskName, state);
} }
void OStreamLogger::_updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) { void OStreamLogger::_updateTaskState(DAGRunID dagRunID,
const std::string &taskName,
RunState state)
{
auto &dagRun = dagRuns_.at(dagRunID); auto &dagRun = dagRuns_.at(dagRunID);
dagRun.taskStateChanges.push_back({Clock::now(), taskName, state}); dagRun.taskStateChanges.push_back({Clock::now(), taskName, state});
auto it = dagRun.taskRunStates.find(taskName); auto it = dagRun.taskRunStates.find(taskName);
if (it == dagRun.taskRunStates.end()) { if (it == dagRun.taskRunStates.end()) {
dagRun.taskRunStates.emplace(taskName, state); dagRun.taskRunStates.emplace(taskName, state);
} else { }
else {
it->second = state; it->second = state;
} }
os_ << "Task State Change (" << dagRunID << '/' << taskName << "): " os_ << "Task State Change (" << dagRunID << '/' << taskName
<< state._to_string() << "): " << state._to_string() << std::endl;
<< std::endl;
} }
// Querying // Querying
std::vector<DAGRunSummary> OStreamLogger::getDAGs(uint32_t stateMask) { std::vector<DAGRunSummary> OStreamLogger::getDAGs(uint32_t stateMask)
{
std::vector<DAGRunSummary> summaries; std::vector<DAGRunSummary> summaries;
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
size_t i = 0; size_t i = 0;
for (const auto &run: dagRuns_) { for (const auto &run : dagRuns_) {
DAGRunSummary summary{ DAGRunSummary summary{
.runID = i, .runID = i,
.name = run.name, .name = run.name,
.runState = run.dagStateChanges.back().newState, .runState = run.dagStateChanges.back().newState,
.startTime = run.dagStateChanges.front().time, .startTime = run.dagStateChanges.front().time,
.lastUpdate = std::max<TimePoint>(run.taskStateChanges.back().time, .lastUpdate = std::max<TimePoint>(run.taskStateChanges.back().time,
run.dagStateChanges.back().time) run.dagStateChanges.back().time)};
};
for (const auto &[_, taskState]: run.taskRunStates) { for (const auto &[_, taskState] : run.taskRunStates) {
summary.taskStateCounts[taskState]++; summary.taskStateCounts[taskState]++;
} }
@@ -110,13 +124,12 @@ namespace daggy {
return summaries; return summaries;
} }
DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID) { DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID)
{
if (dagRunID >= dagRuns_.size()) { if (dagRunID >= dagRuns_.size()) {
throw std::runtime_error("No such DAGRun ID"); throw std::runtime_error("No such DAGRun ID");
} }
std::lock_guard<std::mutex> lock(guard_); std::lock_guard<std::mutex> lock(guard_);
return dagRuns_[dagRunID]; return dagRuns_[dagRunID];
} }
} }}} // namespace daggy::loggers::dag_run
}
}

View File

@@ -1,9 +1,9 @@
#include <catch2/catch.hpp>
#include <iostream> #include <iostream>
#include "daggy/DAG.hpp" #include "daggy/DAG.hpp"
#include <catch2/catch.hpp> TEST_CASE("General tests", "[general]")
{
TEST_CASE("General tests", "[general]") {
REQUIRE(1 == 1); REQUIRE(1 == 1);
} }

View File

@@ -6,7 +6,8 @@
#include <catch2/catch.hpp> #include <catch2/catch.hpp>
TEST_CASE("Sanity tests", "[sanity]") { TEST_CASE("Sanity tests", "[sanity]")
{
REQUIRE(1 == 1); REQUIRE(1 == 1);
} }

View File

@@ -1,10 +1,10 @@
#include <catch2/catch.hpp>
#include <iostream> #include <iostream>
#include "daggy/DAG.hpp" #include "daggy/DAG.hpp"
#include <catch2/catch.hpp> TEST_CASE("dag_construction", "[dag]")
{
TEST_CASE("dag_construction", "[dag]") {
daggy::DAG<size_t, size_t> dag; daggy::DAG<size_t, size_t> dag;
REQUIRE(dag.size() == 0); REQUIRE(dag.size() == 0);
@@ -26,18 +26,22 @@ TEST_CASE("dag_construction", "[dag]") {
REQUIRE(!dag.isValid()); REQUIRE(!dag.isValid());
// Bounds checking // Bounds checking
SECTION("addEdge Bounds Checking") { SECTION("addEdge Bounds Checking")
{
REQUIRE_THROWS(dag.addEdge(20, 0)); REQUIRE_THROWS(dag.addEdge(20, 0));
REQUIRE_THROWS(dag.addEdge(0, 20)); REQUIRE_THROWS(dag.addEdge(0, 20));
} }
} }
TEST_CASE("dag_traversal", "[dag]") { TEST_CASE("dag_traversal", "[dag]")
{
daggy::DAG<size_t, size_t> dag; daggy::DAG<size_t, size_t> dag;
const int N_VERTICES = 10; const int N_VERTICES = 10;
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(i, i); } for (int i = 0; i < N_VERTICES; ++i) {
dag.addVertex(i, i);
}
/* /*
0 ---------------------\ 0 ---------------------\
@@ -46,23 +50,15 @@ TEST_CASE("dag_traversal", "[dag]") {
4 -------------------------------/ \-----> 9 4 -------------------------------/ \-----> 9
*/ */
std::vector<std::pair<int, int>> edges{ std::vector<std::pair<int, int>> edges{{0, 6}, {1, 5}, {5, 6}, {6, 7}, {2, 3},
{0, 6}, {3, 5}, {4, 7}, {7, 8}, {7, 9}};
{1, 5},
{5, 6},
{6, 7},
{2, 3},
{3, 5},
{4, 7},
{7, 8},
{7, 9}
};
for (auto const[from, to]: edges) { for (auto const [from, to] : edges) {
dag.addEdge(from, to); dag.addEdge(from, to);
} }
SECTION("Basic Traversal") { SECTION("Basic Traversal")
{
dag.reset(); dag.reset();
std::vector<int> visitOrder(N_VERTICES); std::vector<int> visitOrder(N_VERTICES);
size_t i = 0; size_t i = 0;
@@ -74,15 +70,16 @@ TEST_CASE("dag_traversal", "[dag]") {
} }
// Ensure visit order is preserved // Ensure visit order is preserved
for (auto const[from, to]: edges) { for (auto const [from, to] : edges) {
REQUIRE(visitOrder[from] <= visitOrder[to]); REQUIRE(visitOrder[from] <= visitOrder[to]);
} }
} }
SECTION("Iteration") { SECTION("Iteration")
{
size_t nVisited = 0; size_t nVisited = 0;
dag.forEach([&](auto &k) { dag.forEach([&](auto &k) {
(void) k; (void)k;
++nVisited; ++nVisited;
}); });
REQUIRE(nVisited == dag.size()); REQUIRE(nVisited == dag.size());

View File

@@ -1,8 +1,7 @@
#include <iostream> #include <catch2/catch.hpp>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
#include <iostream>
#include <catch2/catch.hpp>
#include "daggy/loggers/dag_run/FileSystemLogger.hpp" #include "daggy/loggers/dag_run/FileSystemLogger.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp" #include "daggy/loggers/dag_run/OStreamLogger.hpp"
@@ -13,19 +12,26 @@ using namespace daggy;
using namespace daggy::loggers::dag_run; using namespace daggy::loggers::dag_run;
const TaskSet SAMPLE_TASKS{ const TaskSet SAMPLE_TASKS{
{"work_a", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}}, .children{"c"}}}, {"work_a",
{"work_b", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}}, .children{"c"}}}, Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}},
{"work_c", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}} .children{"c"}}},
}; {"work_b",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}},
.children{"c"}}},
{"work_c",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}};
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskSet &tasks) { inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name,
const TaskSet &tasks)
{
auto runID = logger.startDAGRun(name, tasks); auto runID = logger.startDAGRun(name, tasks);
auto dagRun = logger.getDAGRun(runID); auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.tasks == tasks); REQUIRE(dagRun.tasks == tasks);
REQUIRE(dagRun.taskRunStates.size() == tasks.size()); REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask = std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(), auto nonQueuedTask =
std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; }); [](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end()); REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
@@ -54,12 +60,14 @@ TEST_CASE("Filesystem Logger", "[filesystem_logger]") {
} }
*/ */
TEST_CASE("ostream_logger", "[ostream_logger]") { TEST_CASE("ostream_logger", "[ostream_logger]")
//cleanup(); {
// cleanup();
std::stringstream ss; std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("DAGRun Starts") { SECTION("DAGRun Starts")
{
testDAGRunInit(logger, "init_test", SAMPLE_TASKS); testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
} }

View File

@@ -1,18 +1,20 @@
#include <iostream> #include <catch2/catch.hpp>
#include <filesystem> #include <filesystem>
#include <iostream>
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/Serialization.hpp" #include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp" #include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include <catch2/catch.hpp> TEST_CASE("forking_executor", "[forking_executor]")
{
TEST_CASE("forking_executor", "[forking_executor]") {
daggy::executors::task::ForkingTaskExecutor ex(10); daggy::executors::task::ForkingTaskExecutor ex(10);
SECTION("Simple Run") { SECTION("Simple Run")
daggy::Task task{.job{ {
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/echo", "abc", "123"}}}}; daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/echo", "abc", "123"}}}};
REQUIRE(ex.validateTaskParameters(task.job)); REQUIRE(ex.validateTaskParameters(task.job));
@@ -24,9 +26,11 @@ TEST_CASE("forking_executor", "[forking_executor]") {
REQUIRE(rec.errorLog.empty()); REQUIRE(rec.errorLog.empty());
} }
SECTION("Error Run") { SECTION("Error Run")
daggy::Task task{.job{ {
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}}; daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
auto recFuture = ex.execute("command", task); auto recFuture = ex.execute("command", task);
auto rec = recFuture.get(); auto rec = recFuture.get();
@@ -36,16 +40,19 @@ TEST_CASE("forking_executor", "[forking_executor]") {
REQUIRE(rec.outputLog.empty()); REQUIRE(rec.outputLog.empty());
} }
SECTION("Large Output") { SECTION("Large Output")
const std::vector<std::string> BIG_FILES{ {
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli" const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
}; "/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile: BIG_FILES) { for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile)) continue; if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{.job{ daggy::Task task{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/cat", bigFile}}}}; .job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
auto recFuture = ex.execute("command", task); auto recFuture = ex.execute("command", task);
auto rec = recFuture.get(); auto rec = recFuture.get();
@@ -56,30 +63,40 @@ TEST_CASE("forking_executor", "[forking_executor]") {
} }
} }
SECTION("Parameter Expansion") { SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"}; std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})"; std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON); auto tasks = daggy::tasksFromJSON(taskJSON);
auto result = daggy::expandTaskSet(tasks, ex, params); auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2); REQUIRE(result.size() == 2);
} }
SECTION("Build with expansion") { SECTION("Build with expansion")
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"}; {
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})"; std::string testTasks =
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params); R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4); REQUIRE(tasks.size() == 4);
} }
SECTION("Build with expansion using parents instead of children") { SECTION("Build with expansion using parents instead of children")
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"}; {
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})"; std::string testTasks =
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params); R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4); REQUIRE(tasks.size() == 4);
} }

View File

@@ -1,37 +1,35 @@
#include <iostream>
#include <filesystem>
#include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h>
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include <catch2/catch.hpp> #include <catch2/catch.hpp>
#include <filesystem>
#include <iostream>
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
namespace fs = std::filesystem; namespace fs = std::filesystem;
#ifdef DAGGY_ENABLE_SLURM #ifdef DAGGY_ENABLE_SLURM
TEST_CASE("slurm_execution", "[slurm_executor]") { TEST_CASE("slurm_execution", "[slurm_executor]")
{
daggy::executors::task::SlurmTaskExecutor ex; daggy::executors::task::SlurmTaskExecutor ex;
daggy::ConfigValues defaultJobValues{ daggy::ConfigValues defaultJobValues{{"minCPUs", "1"},
{"minCPUs", "1"},
{"minMemoryMB", "100"}, {"minMemoryMB", "100"},
{"minTmpDiskMB", "0"}, {"minTmpDiskMB", "0"},
{"priority", "1"}, {"priority", "1"},
{"timeLimitSeconds", "200"}, {"timeLimitSeconds", "200"},
{"userID", std::to_string(getuid())}, {"userID", std::to_string(getuid())},
{"workDir", fs::current_path().string()}, {"workDir", fs::current_path().string()},
{"tmpDir", fs::current_path().string()} {"tmpDir", fs::current_path().string()}};
};
SECTION("Simple Run") { SECTION("Simple Run")
{
daggy::Task task{.job{ daggy::Task task{.job{
{"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}} {"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}}}};
}};
task.job.merge(defaultJobValues); task.job.merge(defaultJobValues);
@@ -45,9 +43,11 @@ TEST_CASE("slurm_execution", "[slurm_executor]") {
REQUIRE(rec.errorLog.empty()); REQUIRE(rec.errorLog.empty());
} }
SECTION("Error Run") { SECTION("Error Run")
daggy::Task task{.job{ {
{"command", daggy::executors::task::SlurmTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}}; daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
task.job.merge(defaultJobValues); task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task); auto recFuture = ex.execute("command", task);
@@ -58,16 +58,19 @@ TEST_CASE("slurm_execution", "[slurm_executor]") {
REQUIRE(rec.outputLog.empty()); REQUIRE(rec.outputLog.empty());
} }
SECTION("Large Output") { SECTION("Large Output")
const std::vector<std::string> BIG_FILES{ {
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli" const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
}; "/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile: BIG_FILES) { for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile)) continue; if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{.job{ daggy::Task task{
{"command", daggy::executors::task::SlurmTaskExecutor::Command{"/usr/bin/cat", bigFile}}}}; .job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
task.job.merge(defaultJobValues); task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task); auto recFuture = ex.execute("command", task);
@@ -80,30 +83,40 @@ TEST_CASE("slurm_execution", "[slurm_executor]") {
} }
} }
SECTION("Parameter Expansion") { SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"}; std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})"; std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON, defaultJobValues); auto tasks = daggy::tasksFromJSON(taskJSON, defaultJobValues);
auto result = daggy::expandTaskSet(tasks, ex, params); auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2); REQUIRE(result.size() == 2);
} }
SECTION("Build with expansion") { SECTION("Build with expansion")
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"}; {
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})"; std::string testTasks =
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params); R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4); REQUIRE(tasks.size() == 4);
} }
SECTION("Build with expansion using parents instead of children") { SECTION("Build with expansion using parents instead of children")
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"}; {
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})"; std::string testTasks =
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params); R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4); REQUIRE(tasks.size() == 4);
} }

View File

@@ -1,34 +1,47 @@
#include <iostream> #include <catch2/catch.hpp>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
#include <iostream>
#include <catch2/catch.hpp>
#include "daggy/Serialization.hpp" #include "daggy/Serialization.hpp"
namespace fs = std::filesystem; namespace fs = std::filesystem;
TEST_CASE("parameter_deserialization", "[deserialize_parameters]") { TEST_CASE("parameter_deserialization", "[deserialize_parameters]")
SECTION("Basic Parse") { {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"}; SECTION("Basic Parse")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
REQUIRE(params.size() == 2); REQUIRE(params.size() == 2);
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"])); REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"])); REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
}SECTION("Invalid JSON") { }
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"}; SECTION("Invalid JSON")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
REQUIRE_THROWS(daggy::configFromJSON(testParams)); REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-string Keys") { }
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"}; SECTION("Non-string Keys")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams)); REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-array/Non-string values") { }
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"}; SECTION("Non-array/Non-string values")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams)); REQUIRE_THROWS(daggy::configFromJSON(testParams));
} }
} }
TEST_CASE("task_deserialization", "[deserialize_task]") { TEST_CASE("task_deserialization", "[deserialize_task]")
SECTION("Build with no expansion") { {
SECTION("Build with no expansion")
{
std::string testTasks = R"({ std::string testTasks = R"({
"A": { "A": {
"job": { "command": ["/bin/echo", "A"] }, "job": { "command": ["/bin/echo", "A"] },
@@ -46,7 +59,8 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
REQUIRE(tasks.size() == 3); REQUIRE(tasks.size() == 3);
} }
SECTION("Build with job defaults") { SECTION("Build with job defaults")
{
std::string testTasks = R"({ std::string testTasks = R"({
"A": { "A": {
"job": { "command": ["/bin/echo", "A"] }, "job": { "command": ["/bin/echo", "A"] },
@@ -59,8 +73,7 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
} }
} }
})"; })";
daggy::ConfigValues jobDefaults{{"runtime", "60"}, daggy::ConfigValues jobDefaults{{"runtime", "60"}, {"memory", "300M"}};
{"memory", "300M"}};
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults); auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
REQUIRE(tasks.size() == 2); REQUIRE(tasks.size() == 2);
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60"); REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
@@ -70,9 +83,12 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
} }
} }
TEST_CASE("task_serialization", "[serialize_tasks]") { TEST_CASE("task_serialization", "[serialize_tasks]")
SECTION("Build with no expansion") { {
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})"; SECTION("Build with no expansion")
{
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::tasksFromJSON(testTasks); auto tasks = daggy::tasksFromJSON(testTasks);
auto genJSON = daggy::tasksToJSON(tasks); auto genJSON = daggy::tasksToJSON(tasks);
@@ -80,7 +96,7 @@ TEST_CASE("task_serialization", "[serialize_tasks]") {
REQUIRE(regenTasks.size() == tasks.size()); REQUIRE(regenTasks.size() == tasks.size());
for (const auto &[name, task]: regenTasks) { for (const auto &[name, task] : regenTasks) {
const auto &other = tasks[name]; const auto &other = tasks[name];
REQUIRE(task == other); REQUIRE(task == other);
} }

View File

@@ -1,20 +1,19 @@
#include <iostream>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include <pistache/client.h> #include <pistache/client.h>
#include <rapidjson/document.h> #include <rapidjson/document.h>
#include <daggy/Server.hpp> #include <catch2/catch.hpp>
#include <daggy/Serialization.hpp> #include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp> #include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp> #include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
namespace rj = rapidjson; namespace rj = rapidjson;
Pistache::Http::Response Pistache::Http::Response REQUEST(std::string url, std::string payload = "")
REQUEST(std::string url, std::string payload = "") { {
Pistache::Http::Experimental::Client client; Pistache::Http::Experimental::Client client;
client.init(); client.init();
Pistache::Http::Response response; Pistache::Http::Response response;
@@ -35,11 +34,11 @@ REQUEST(std::string url, std::string payload = "") {
error = true; error = true;
try { try {
std::rethrow_exception(ptr); std::rethrow_exception(ptr);
} catch (std::exception &e) { }
catch (std::exception &e) {
msg = e.what(); msg = e.what();
} }
} });
);
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request); Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
barrier.wait_for(std::chrono::seconds(2)); barrier.wait_for(std::chrono::seconds(2));
@@ -50,14 +49,14 @@ REQUEST(std::string url, std::string payload = "") {
return response; return response;
} }
TEST_CASE("rest_endpoint", "[server_basic]") { TEST_CASE("rest_endpoint", "[server_basic]")
{
std::stringstream ss; std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10); daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0)); Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, const size_t nDAGRunners = 10, nWebThreads = 10;
nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners); daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads); server.init(nWebThreads);
@@ -66,17 +65,20 @@ TEST_CASE("rest_endpoint", "[server_basic]") {
const std::string host = "localhost:"; const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort()); const std::string baseURL = host + std::to_string(server.getPort());
SECTION ("Ready Endpoint") { SECTION("Ready Endpoint")
{
auto response = REQUEST(baseURL + "/ready"); auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code() == Pistache::Http::Code::Ok); REQUIRE(response.code() == Pistache::Http::Code::Ok);
} }
SECTION ("Querying a non-existent dagrunid should fail ") { SECTION("Querying a non-existent dagrunid should fail ")
{
auto response = REQUEST(baseURL + "/v1/dagrun/100"); auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code() != Pistache::Http::Code::Ok); REQUIRE(response.code() != Pistache::Http::Code::Ok);
} }
SECTION("Simple DAGRun Submission") { SECTION("Simple DAGRun Submission")
{
std::string dagRun = R"({ std::string dagRun = R"({
"name": "unit_server", "name": "unit_server",
"parameters": { "FILE": [ "A", "B" ] }, "parameters": { "FILE": [ "A", "B" ] },
@@ -88,7 +90,6 @@ TEST_CASE("rest_endpoint", "[server_basic]") {
} }
})"; })";
// Submit, and get the runID // Submit, and get the runID
daggy::DAGRunID runID = 0; daggy::DAGRunID runID = 0;
{ {
@@ -145,26 +146,29 @@ TEST_CASE("rest_endpoint", "[server_basic]") {
const auto &taskStates = doc["taskStates"].GetObject(); const auto &taskStates = doc["taskStates"].GetObject();
size_t nStates = 0; size_t nStates = 0;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd(); ++it) { for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
nStates++; nStates++;
} }
REQUIRE(nStates == 3); REQUIRE(nStates == 3);
complete = true; complete = true;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd(); ++it) { for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
std::string state = it->value.GetString(); std::string state = it->value.GetString();
if (state != "COMPLETED") { if (state != "COMPLETED") {
complete = false; complete = false;
break; break;
} }
} }
if (complete) break; if (complete)
break;
std::this_thread::sleep_for(std::chrono::seconds(1)); std::this_thread::sleep_for(std::chrono::seconds(1));
} }
REQUIRE(complete); REQUIRE(complete);
std::this_thread::sleep_for(std::chrono::seconds(2)); std::this_thread::sleep_for(std::chrono::seconds(2));
for (const auto &pth: std::vector<fs::path>{"dagrun_A", "dagrun_B"}) { for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
REQUIRE(fs::exists(pth)); REQUIRE(fs::exists(pth));
fs::remove(pth); fs::remove(pth);
} }

View File

@@ -1,19 +1,20 @@
#include <iostream> #include <catch2/catch.hpp>
#include <future> #include <future>
#include <iostream>
#include "daggy/ThreadPool.hpp" #include "daggy/ThreadPool.hpp"
#include <catch2/catch.hpp>
using namespace daggy; using namespace daggy;
TEST_CASE("threadpool", "[threadpool]") { TEST_CASE("threadpool", "[threadpool]")
{
std::atomic<uint32_t> cnt(0); std::atomic<uint32_t> cnt(0);
ThreadPool tp(10); ThreadPool tp(10);
std::vector<std::future<uint32_t>> rets; std::vector<std::future<uint32_t>> rets;
SECTION("Adding large tasks queues with return values") { SECTION("Adding large tasks queues with return values")
{
auto tq = std::make_shared<daggy::TaskQueue>(); auto tq = std::make_shared<daggy::TaskQueue>();
std::vector<std::future<uint32_t>> res; std::vector<std::future<uint32_t>> res;
for (size_t i = 0; i < 100; ++i) for (size_t i = 0; i < 100; ++i)
@@ -22,11 +23,13 @@ TEST_CASE("threadpool", "[threadpool]") {
return cnt.load(); return cnt.load();
}))); })));
tp.addTasks(tq); tp.addTasks(tq);
for (auto &r: res) r.get(); for (auto &r : res)
r.get();
REQUIRE(cnt == 100); REQUIRE(cnt == 100);
} }
SECTION("Slow runs") { SECTION("Slow runs")
{
std::vector<std::future<void>> res; std::vector<std::future<void>> res;
using namespace std::chrono_literals; using namespace std::chrono_literals;
for (size_t i = 0; i < 100; ++i) for (size_t i = 0; i < 100; ++i)
@@ -35,7 +38,8 @@ TEST_CASE("threadpool", "[threadpool]") {
cnt++; cnt++;
return; return;
})); }));
for (auto &r: res) r.get(); for (auto &r : res)
r.get();
REQUIRE(cnt == 100); REQUIRE(cnt == 100);
} }
} }

View File

@@ -1,41 +1,45 @@
#include <iostream> #include <algorithm>
#include <catch2/catch.hpp>
#include <chrono> #include <chrono>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#include <algorithm>
#include <iostream> #include <iostream>
#include <random> #include <random>
#include <catch2/catch.hpp>
#include "daggy/Utilities.hpp"
#include "daggy/Serialization.hpp" #include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp" #include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/executors/task/NoopTaskExecutor.hpp" #include "daggy/executors/task/NoopTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp" #include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem; namespace fs = std::filesystem;
TEST_CASE("string_utilities", "[utilities_string]") { TEST_CASE("string_utilities", "[utilities_string]")
{
std::string test = "/this/is/{{A}}/test/{{A}}"; std::string test = "/this/is/{{A}}/test/{{A}}";
auto res = daggy::globalSub(test, "{{A}}", "hello"); auto res = daggy::globalSub(test, "{{A}}", "hello");
REQUIRE(res == "/this/is/hello/test/hello"); REQUIRE(res == "/this/is/hello/test/hello");
} }
TEST_CASE("string_expansion", "[utilities_parameter_expansion]") { TEST_CASE("string_expansion", "[utilities_parameter_expansion]")
SECTION("Basic expansion") { {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"}; SECTION("Basic expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"}; std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}",
"{{TYPE}}"};
auto allCommands = daggy::interpolateValues(cmd, params); auto allCommands = daggy::interpolateValues(cmd, params);
REQUIRE(allCommands.size() == 6); REQUIRE(allCommands.size() == 6);
} }
SECTION("Skip over unused parameters") { SECTION("Skip over unused parameters")
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"}; {
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"}; std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
auto allCommands = daggy::interpolateValues(cmd, params); auto allCommands = daggy::interpolateValues(cmd, params);
@@ -44,7 +48,8 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]") {
REQUIRE(allCommands.size() == 2); REQUIRE(allCommands.size() == 2);
} }
SECTION("Expand within a command part") { SECTION("Expand within a command part")
{
std::string testParams{ std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"}; R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
@@ -53,19 +58,19 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]") {
// TYPE isn't used, so it's just |DATE| * |SOURCE| // TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(result.size() == 4); REQUIRE(result.size() == 4);
} }
} }
TEST_CASE("dag_runner_order", "[dagrun_order]") { TEST_CASE("dag_runner_order", "[dagrun_order]")
{
daggy::executors::task::NoopTaskExecutor ex; daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss; std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint startTime = daggy::Clock::now(); daggy::TimePoint startTime = daggy::Clock::now();
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"}; std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({ std::string taskJSON = R"({
@@ -90,39 +95,43 @@ TEST_CASE("dag_runner_order", "[dagrun_order]") {
auto rec = logger.getDAGRun(runID); auto rec = logger.getDAGRun(runID);
daggy::TimePoint stopTime = daggy::Clock::now(); daggy::TimePoint stopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes; minTimes.fill(startTime); std::array<daggy::TimePoint, 5> minTimes;
std::array<daggy::TimePoint, 5> maxTimes; maxTimes.fill(stopTime); minTimes.fill(startTime);
std::array<daggy::TimePoint, 5> maxTimes;
maxTimes.fill(stopTime);
for (const auto &[k, v] : rec.taskAttempts) { for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65; size_t idx = k[0] - 65;
auto & startTime = minTimes[idx]; auto &startTime = minTimes[idx];
auto & stopTime = maxTimes[idx]; auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime); startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime); stopTime = std::min(stopTime, v.back().stopTime);
} }
for (size_t i = 0; i < 5; ++i) { for (size_t i = 0; i < 5; ++i) {
for (size_t j = i+1; j < 4; ++j) { for (size_t j = i + 1; j < 4; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]); REQUIRE(maxTimes[i] < minTimes[j]);
} }
} }
} }
TEST_CASE("dag_runner", "[utilities_dag_runner]") { TEST_CASE("dag_runner", "[utilities_dag_runner]")
{
daggy::executors::task::ForkingTaskExecutor ex(10); daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss; std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Simple execution") { SECTION("Simple execution")
{
std::string prefix = (fs::current_path() / "asdlk").string(); std::string prefix = (fs::current_path() / "asdlk").string();
std::unordered_map<std::string, std::string> files{ std::unordered_map<std::string, std::string> files{
{"A", prefix + "_A"}, {"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
{"B", prefix + "_B"}, std::string taskJSON =
{"C", prefix + "_C"}}; R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")" R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
+ files.at("A") + R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" files.at("B") +
+ files.at("B") + R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
+ files.at("C") + R"("]}}})"; files.at("C") + R"("]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex); auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks); auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks); auto runID = logger.startDAGRun("test_run", tasks);
@@ -137,18 +146,20 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
// Get the DAG Run Attempts // Get the DAG Run Attempts
auto record = logger.getDAGRun(runID); auto record = logger.getDAGRun(runID);
for (const auto &[_, attempts]: record.taskAttempts) { for (const auto &[_, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1); REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0); REQUIRE(attempts.front().rc == 0);
} }
} }
SECTION("Recovery from Error") { SECTION("Recovery from Error")
{
auto cleanup = []() { auto cleanup = []() {
// Cleanup // Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"}; std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth: paths) { for (const auto &pth : paths) {
if (fs::exists(pth)) fs::remove_all(pth); if (fs::exists(pth))
fs::remove_all(pth);
} }
}; };
@@ -156,12 +167,12 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
std::string goodPrefix = "rec_error_"; std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_"; std::string badPrefix = "noexist/rec_error_";
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")" std::string taskJSON =
+ goodPrefix + R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
+ badPrefix + badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
+ badPrefix + R"(C"]}}})"; badPrefix + R"(C"]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex); auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks); auto dag = daggy::buildDAGFromTasks(tasks);
@@ -181,17 +192,21 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
// Get the DAG Run Attempts // Get the DAG Run Attempts
auto record = logger.getDAGRun(runID); auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B_0"].size() == 2); // B errored and had to be retried REQUIRE(record.taskAttempts["B_0"].size() ==
REQUIRE(record.taskAttempts["C_0"].size() == 1); // C wasn't run because B errored 2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() ==
1); // C wasn't run because B errored
cleanup(); cleanup();
} }
SECTION("Generator tasks") { SECTION("Generator tasks")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"}; std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams); auto params = daggy::configFromJSON(testParams);
std::string generatorOutput = R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})"; std::string generatorOutput =
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json"; fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn}; std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl; ofh << generatorOutput << std::endl;
@@ -199,7 +214,9 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
std::stringstream jsonTasks; std::stringstream jsonTasks;
jsonTasks << R"({ "A": { "job": {"command": [ "/usr/bin/cat", )" << std::quoted(ofn.string()) jsonTasks
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
<< std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)" << R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })"; << R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
@@ -221,15 +238,18 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
REQUIRE(record.tasks.size() == 4); REQUIRE(record.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4); REQUIRE(record.taskRunStates.size() == 4);
for (const auto &[taskName, attempts]: record.taskAttempts) { for (const auto &[taskName, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1); REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0); REQUIRE(attempts.back().rc == 0);
} }
// Ensure that children were updated properly // Ensure that children were updated properly
REQUIRE(record.tasks["A_0"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"}); REQUIRE(record.tasks["A_0"].children ==
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"}); std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"}); REQUIRE(record.tasks["B_0"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C_0"].children.empty()); REQUIRE(record.tasks["C_0"].children.empty());
} }
} }

View File

@@ -1,16 +1,15 @@
#include <iostream>
#include <fstream>
#include <filesystem>
#include <argparse.hpp>
#include <pistache/client.h> #include <pistache/client.h>
#include <rapidjson/document.h> #include <rapidjson/document.h>
#include <argparse.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
namespace rj = rapidjson; namespace rj = rapidjson;
Pistache::Http::Response Pistache::Http::Response REQUEST(std::string url, std::string payload = "")
REQUEST(std::string url, std::string payload = "") { {
Pistache::Http::Experimental::Client client; Pistache::Http::Experimental::Client client;
client.init(); client.init();
Pistache::Http::Response response; Pistache::Http::Response response;
@@ -31,11 +30,11 @@ REQUEST(std::string url, std::string payload = "") {
error = true; error = true;
try { try {
std::rethrow_exception(ptr); std::rethrow_exception(ptr);
} catch (std::exception &e) { }
catch (std::exception &e) {
msg = e.what(); msg = e.what();
} }
} });
);
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request); Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
barrier.wait_for(std::chrono::seconds(2)); barrier.wait_for(std::chrono::seconds(2));
@@ -46,7 +45,8 @@ REQUEST(std::string url, std::string payload = "") {
return response; return response;
} }
int main(int argc, char **argv) { int main(int argc, char **argv)
{
argparse::ArgumentParser args("Daggy Client"); argparse::ArgumentParser args("Daggy Client");
args.add_argument("-v", "--verbose") args.add_argument("-v", "--verbose")
@@ -55,10 +55,8 @@ int main(int argc, char **argv) {
args.add_argument("--url") args.add_argument("--url")
.help("base URL of server") .help("base URL of server")
.default_value("http://localhost:2503"); .default_value("http://localhost:2503");
args.add_argument("--sync") args.add_argument("--sync").default_value(false).implicit_value(true).help(
.default_value(false) "Poll for job to complete");
.implicit_value(true)
.help("Poll for job to complete");
args.add_argument("--action") args.add_argument("--action")
.help("Number of tasks to run concurrently") .help("Number of tasks to run concurrently")
.default_value(30) .default_value(30)
@@ -66,7 +64,8 @@ int main(int argc, char **argv) {
try { try {
args.parse_args(argc, argv); args.parse_args(argc, argv);
} catch (std::exception &e) { }
catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl; std::cout << "Error: " << e.what() << std::endl;
std::cout << args; std::cout << args;
exit(1); exit(1);

View File

@@ -1,13 +1,11 @@
#include <iostream>
#include <fstream>
#include <atomic>
#include <sys/stat.h>
#include <signal.h> #include <signal.h>
#include <sys/stat.h>
#include <argparse.hpp> #include <argparse.hpp>
#include <atomic>
#include <daggy/Server.hpp> #include <daggy/Server.hpp>
#include <fstream>
#include <iostream>
// Add executors here // Add executors here
#ifdef DAGGY_ENABLE_SLURM #ifdef DAGGY_ENABLE_SLURM
@@ -24,14 +22,15 @@
/* /*
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
#include <syslog.h> #include <syslog.h>
#include <unistd.h>
*/ */
static std::atomic<bool> running{true}; static std::atomic<bool> running{true};
void signalHandler(int signal) { void signalHandler(int signal)
{
switch (signal) { switch (signal) {
case SIGHUP: case SIGHUP:
break; break;
@@ -42,22 +41,27 @@ void signalHandler(int signal) {
} }
} }
void daemonize() { void daemonize()
{
pid_t pid; pid_t pid;
struct sigaction newSigAction; struct sigaction newSigAction;
sigset_t newSigSet; sigset_t newSigSet;
/* Check if parent process id is set */ /* Check if parent process id is set */
if (getppid() == 1) { return; } if (getppid() == 1) {
return;
}
/* Set signal mask - signals we want to block */ /* Set signal mask - signals we want to block */
sigemptyset(&newSigSet); sigemptyset(&newSigSet);
sigaddset(&newSigSet, SIGCHLD); /* ignore child - i.e. we don't need to wait for it */ sigaddset(&newSigSet,
SIGCHLD); /* ignore child - i.e. we don't need to wait for it */
sigaddset(&newSigSet, SIGTSTP); /* ignore Tty stop signals */ sigaddset(&newSigSet, SIGTSTP); /* ignore Tty stop signals */
sigaddset(&newSigSet, SIGTTOU); /* ignore Tty background writes */ sigaddset(&newSigSet, SIGTTOU); /* ignore Tty background writes */
sigaddset(&newSigSet, SIGTTIN); /* ignore Tty background reads */ sigaddset(&newSigSet, SIGTTIN); /* ignore Tty background reads */
sigprocmask(SIG_BLOCK, &newSigSet, NULL); /* Block the above specified signals */ sigprocmask(SIG_BLOCK, &newSigSet,
NULL); /* Block the above specified signals */
/* Set up a signal handler */ /* Set up a signal handler */
newSigAction.sa_handler = signalHandler; newSigAction.sa_handler = signalHandler;
@@ -71,8 +75,12 @@ void daemonize() {
// Fork once // Fork once
pid = fork(); pid = fork();
if (pid < 0) { exit(EXIT_FAILURE); } if (pid < 0) {
if (pid > 0) { exit(EXIT_SUCCESS); } exit(EXIT_FAILURE);
}
if (pid > 0) {
exit(EXIT_SUCCESS);
}
/* On success: The child process becomes session leader */ /* On success: The child process becomes session leader */
if (setsid() < 0) { if (setsid() < 0) {
@@ -99,18 +107,19 @@ void daemonize() {
(void)rc; (void)rc;
/* Close all open file descriptors */ /* Close all open file descriptors */
for (auto x = sysconf(_SC_OPEN_MAX); x >= 0; x--) { close(x); } for (auto x = sysconf(_SC_OPEN_MAX); x >= 0; x--) {
close(x);
}
} }
int main(int argc, char **argv) { int main(int argc, char **argv)
{
argparse::ArgumentParser args("Daggy"); argparse::ArgumentParser args("Daggy");
args.add_argument("-v", "--verbose") args.add_argument("-v", "--verbose")
.default_value(false) .default_value(false)
.implicit_value(true); .implicit_value(true);
args.add_argument("-d", "--daemon") args.add_argument("-d", "--daemon").default_value(false).implicit_value(true);
.default_value(false)
.implicit_value(true);
args.add_argument("--ip") args.add_argument("--ip")
.help("IP address to listen to") .help("IP address to listen to")
.default_value(std::string{"127.0.0.1"}); .default_value(std::string{"127.0.0.1"});
@@ -136,7 +145,8 @@ int main(int argc, char **argv) {
try { try {
args.parse_args(argc, argv); args.parse_args(argc, argv);
} catch (std::exception &e) { }
catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl; std::cout << "Error: " << e.what() << std::endl;
std::cout << args; std::cout << args;
exit(1); exit(1);
@@ -156,7 +166,8 @@ int main(int argc, char **argv) {
std::cout << "Unable to daemonize if logging to stdout" << std::endl; std::cout << "Unable to daemonize if logging to stdout" << std::endl;
exit(1); exit(1);
} }
} else { }
else {
fs::path logFn{logFileName}; fs::path logFn{logFileName};
if (!logFn.is_absolute()) { if (!logFn.is_absolute()) {
logFileName = (fs::current_path() / logFileName).string(); logFileName = (fs::current_path() / logFileName).string();
@@ -164,12 +175,14 @@ int main(int argc, char **argv) {
} }
if (verbose) { if (verbose) {
std::cout << "Server running at http://" << listenIP << ':' << listenPort << std::endl std::cout << "Server running at http://" << listenIP << ':' << listenPort
<< std::endl
<< "Max DAG Processing: " << dagThreads << std::endl << "Max DAG Processing: " << dagThreads << std::endl
<< "Max Task Execution: " << executorThreads << std::endl << "Max Task Execution: " << executorThreads << std::endl
<< "Max Web Clients: " << webThreads << std::endl << "Max Web Clients: " << webThreads << std::endl
<< "Logging to: " << logFileName << std::endl << "Logging to: " << logFileName << std::endl
<< std::endl << "Ctrl-C to exit" << std::endl; << std::endl
<< "Ctrl-C to exit" << std::endl;
} }
if (asDaemon) { if (asDaemon) {
@@ -179,8 +192,10 @@ int main(int argc, char **argv) {
std::ofstream logFH; std::ofstream logFH;
std::unique_ptr<daggy::loggers::dag_run::DAGRunLogger> logger; std::unique_ptr<daggy::loggers::dag_run::DAGRunLogger> logger;
if (logFileName == "-") { if (logFileName == "-") {
logger = std::make_unique<daggy::loggers::dag_run::OStreamLogger>(std::cout); logger =
} else { std::make_unique<daggy::loggers::dag_run::OStreamLogger>(std::cout);
}
else {
logFH.open(logFileName, std::ios::app); logFH.open(logFileName, std::ios::app);
logger = std::make_unique<daggy::loggers::dag_run::OStreamLogger>(logFH); logger = std::make_unique<daggy::loggers::dag_run::OStreamLogger>(logFH);
} }
@@ -196,7 +211,6 @@ int main(int argc, char **argv) {
server.init(webThreads); server.init(webThreads);
server.start(); server.start();
running = true; running = true;
while (running) { while (running) {
std::this_thread::sleep_for(std::chrono::seconds(30)); std::this_thread::sleep_for(std::chrono::seconds(30));