* Formatting code with clang-tidy

* Roughing in more metastore work
This commit is contained in:
Ian Roddis
2021-07-22 12:57:51 -03:00
parent 987718334f
commit 0349a5109b
16 changed files with 561 additions and 599 deletions

View File

@@ -0,0 +1,17 @@
#pragma once
#include <chrono>
#include <string>
namespace daggy {
using Clock = std::chrono::system_clock;
struct AttemptRecord {
std::chrono::time_point<Clock> startTime;
std::chrono::time_point<Clock> stopTime;
int rc; // RC from the task
std::string metaLog; // Logs from the executor
std::string output; // stdout from command
std::string error; // stderr from command
};
}

View File

@@ -34,24 +34,32 @@ namespace daggy {
public: public:
// Vertices // Vertices
size_t addVertex(); size_t addVertex();
const std::vector<Vertex> &getVertices(); const std::vector<Vertex> &getVertices();
// Edges // Edges
void addEdge(const size_t src, const size_t dst); void addEdge(const size_t src, const size_t dst);
void dropEdge(const size_t src, const size_t dst); void dropEdge(const size_t src, const size_t dst);
bool hasPath(const size_t from, const size_t to) const; bool hasPath(const size_t from, const size_t to) const;
const std::vector<Edge> &getEdges(); const std::vector<Edge> &getEdges();
// Attributes // Attributes
size_t size() const; size_t size() const;
bool empty() const; bool empty() const;
// Traversal // Traversal
void reset(); void reset();
VertexState getVertexState(const size_t id) const; VertexState getVertexState(const size_t id) const;
bool allVisited() const; bool allVisited() const;
std::optional<const size_t> visitNext(); std::optional<const size_t> visitNext();
void completeVisit(const size_t id); void completeVisit(const size_t id);
private: private:

View File

@@ -1,70 +0,0 @@
size_t DAG::size() const { return vertices_.size(); }
bool DAG::empty() const { return vertices_.empty(); }
size_t DAG::addVertex() {
vertices_.push_back(Vertex{.state = VertexState::UNVISITED, .depCount = 0});
return vertices_.size();
}
void DAG::dropEdge(const size_t from, const size_t to) {
vertices_[from].children.extract(to);
}
void DAG::addEdge(const size_t from, const size_t to) {
if (hasPath(to, from))
throw std::runtime_error("Adding edge would result in a cycle");
vertices_[from].children.insert(to);
}
bool DAG::hasPath(const size_t from, const size_t to) const {
bool pathFound = false;
for (const auto & child : vertices_[from].children) {
if (child == to) return true;
if (hasPath(child, to)) return true;
}
return false;
}
void DAG::reset() {
// Reset the state of all vertices
for (auto & v : vertices_) {
v.state = VertexState::UNVISITED;
v.depCount = 0;
}
// Calculate the upstream count
for (auto & v : vertices_) {
for (auto c : v.children) {
++vertices_[c].depCount;
}
}
}
bool DAG::allVisited() const {
for (const auto & v : vertices_) {
if (v.state != VertexState::VISITED) return false;
}
return true;
}
std::optional<const size_t > DAG::visitNext() {
for (size_t i = 0; i < vertices_.size(); ++i) {
auto & v = vertices_[i];
if (v.state != VertexState::UNVISITED) continue;
if (v.depCount != 0) continue;
v.state = VertexState::VISITING;
return i;
}
return {};
}
void DAG::completeVisit(const size_t id) {
auto & v = vertices_[id];
v.state = VertexState::VISITED;
for (auto c : v.children) {
--vertices_[c].depCount;
}
}

View File

@@ -0,0 +1,21 @@
#pragma once
#include <string>
#include <unordered_map>
#include <variant>
#include "DAG.hpp"
#include "Task.hpp"
#include "AttemptRecord.hpp"
namespace daggy {
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
using TaskRun = std::vector<AttemptRecord>;
struct DAGRun {
std::vector<Task> tasks;
std::unordered_map<std::string, ParameterValue> parameters;
DAG dag;
std::vector<TaskRun> taskRuns;
};
}

View File

@@ -7,7 +7,7 @@
#include <vector> #include <vector>
#include "Task.hpp" #include "Task.hpp"
#include "AttemptRecord.hpp"
/* /*
Executors run Tasks, returning a future with the results. Executors run Tasks, returning a future with the results.
@@ -15,20 +15,10 @@
*/ */
namespace daggy { namespace daggy {
using Clock = std::chrono::system_clock;
struct AttemptRecord {
std::chrono::time_point<Clock> startTime;
std::chrono::time_point<Clock> stopTime;
int rc; // RC from the task
std::string metaLog; // Logs from the executor
std::string output; // stdout from command
std::string error; // stderr from command
};
class Executor { class Executor {
public: public:
Executor() = default; Executor() = default;
virtual const std::string getName() const = 0; virtual const std::string getName() const = 0;
// This will block if the executor is full // This will block if the executor is full

View File

@@ -2,6 +2,8 @@
#include <string> #include <string>
#include "DAGRun.hpp"
/* /*
MetaStore represents the interface to store all the state information MetaStore represents the interface to store all the state information
for daggy to run. Abstracted in case other back-end solutions need to for daggy to run. Abstracted in case other back-end solutions need to
@@ -9,17 +11,31 @@
*/ */
namespace daggy { namespace daggy {
using DAGDefID = int16_t; // future proofing using DAGDefID = int16_t;
using DAGRunID = size_t;
// This struct will contain transitions for
struct DAGRunEvent { };
class MetaStore { class MetaStore {
// Basic storage + retrieval of DAG Definitions // Basic storage + retrieval of DAG Definitions
virtual void storeDAGDefinition(std::string name, std::string definition) = 0; virtual DAGDefID storeDAGDefinition(std::string name, std::string definition) = 0;
virtual DAGDefID getCurrentDAGVersion(std::string name) = 0; virtual DAGDefID getCurrentDAGVersion(std::string name) = 0;
virtual std::string getDAGDefinition(std::string name, DAGDefID version = -1) = 0; virtual std::string getDAGDefinition(std::string name, DAGDefID version = -1) = 0;
// DAG Run State // DAG Run State
/*
* startDAGRun // DAG starts up, returns a DAGID for future updates
* updateDAGRun // DAG State transitions
* updateTaskState // Task state updates
*/
virtual DAGRunID startDAGRun(std::string dagName, DAGDefID version, DAGRun dagRun
) = 0;
virtual void updateTask(DAGRunID rid, std::string taskName, VertexState state) = 0;
virtual void updateDAGRun(DAGRunID rid, DAGState state) = 0;
// Retrievals
}; };
} }

View File

@@ -7,14 +7,10 @@
#include "DAG.hpp" #include "DAG.hpp"
#include "Executor.hpp" #include "Executor.hpp"
#include "DAGRun.hpp"
#include "ThreadPool.hpp" #include "ThreadPool.hpp"
namespace daggy { namespace daggy {
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
using TaskRun = std::vector<AttemptRecord>;
class Scheduler {
public:
enum class DAGState : uint32_t { enum class DAGState : uint32_t {
UNKNOWN = 0, UNKNOWN = 0,
QUEUED, QUEUED,
@@ -23,25 +19,21 @@ namespace daggy {
COMPLETE COMPLETE
}; };
class Scheduler {
public:
public: public:
Scheduler( Scheduler(
Executor & executor Executor &executor, size_t executorThreads = 30, size_t schedulerThreads = 10);
, size_t executorThreads = 30
, size_t schedulerThreads = 10);
~Scheduler(); ~Scheduler();
// returns DagRun ID // returns DagRun ID
std::future<void> std::future<void>
scheduleDAG(std::string runName scheduleDAG(std::string runName, std::vector<Task> tasks,
, std::vector<Task> tasks std::unordered_map<std::string, ParameterValue> parameters,
, std::unordered_map<std::string, ParameterValue> parameters DAG dag = {} // Allows for loading of an existing DAG
, DAG dag = {} // Allows for loading of an existing DAG
); );
// get the current status of a DAG
DAGState dagRunStatus(std::string runName);
// get the current DAG // get the current DAG
DAG dagRunState(); DAG dagRunState();
@@ -49,16 +41,8 @@ namespace daggy {
void drain(); void drain();
private: private:
struct DAGRun {
std::vector<Task> tasks;
std::unordered_map<std::string, ParameterValue> parameters;
DAG dag;
std::vector<TaskRun> taskRuns;
std::mutex taskGuard_;
};
void runDAG(const std::string &name, DAGRun &dagRun); void runDAG(const std::string &name, DAGRun &dagRun);
std::vector<AttemptRecord> runTask(const Task &task); std::vector<AttemptRecord> runTask(const Task &task);
std::unordered_map<std::string, DAGRun> runs_; std::unordered_map<std::string, DAGRun> runs_;

View File

@@ -10,9 +10,7 @@ namespace daggy {
class Server { class Server {
public: public:
Server(Pistache::Address addr) Server(Pistache::Address addr)
: endpoint_(addr) : endpoint_(addr), desc_("Daggy API", "0.1") {}
, desc_("Daggy API", "0.1")
{}
void init(int threads = 1); void init(int threads = 1);
@@ -26,8 +24,11 @@ namespace daggy {
// //
void listDAGs(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void listDAGs(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void upsertDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void upsertDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void deleteDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void deleteDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void getDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response); void getDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
// //

View File

@@ -66,10 +66,7 @@ namespace daggy {
public: public:
explicit ThreadPool(size_t nWorkers) explicit ThreadPool(size_t nWorkers)
: :
tqit_(taskQueues_.begin()) tqit_(taskQueues_.begin()), stop_(false), drain_(false) {
, stop_(false)
, drain_(false)
{
resize(nWorkers); resize(nWorkers);
} }

View File

@@ -8,6 +8,7 @@ namespace daggy {
class ForkingExecutor : public Executor { class ForkingExecutor : public Executor {
public: public:
ForkingExecutor() = default; ForkingExecutor() = default;
const std::string getName() const override { return "ForkingExecutor"; } const std::string getName() const override { return "ForkingExecutor"; }
AttemptRecord runCommand(std::vector<std::string> cmd) override; AttemptRecord runCommand(std::vector<std::string> cmd) override;

View File

@@ -3,6 +3,7 @@
namespace daggy { namespace daggy {
size_t DAG::size() const { return vertices_.size(); } size_t DAG::size() const { return vertices_.size(); }
bool DAG::empty() const { return vertices_.empty(); } bool DAG::empty() const { return vertices_.empty(); }
size_t DAG::addVertex() { size_t DAG::addVertex() {

View File

@@ -3,13 +3,8 @@
using namespace std::chrono_literals; using namespace std::chrono_literals;
namespace daggy { namespace daggy {
Scheduler::Scheduler(Executor & executor Scheduler::Scheduler(Executor &executor, size_t executorThreads, size_t schedulerThreads)
, size_t executorThreads : executor_(executor), schedulers_(schedulerThreads), executors_(executorThreads) {}
, size_t schedulerThreads)
: executor_(executor)
, schedulers_(schedulerThreads)
, executors_(executorThreads)
{ }
Scheduler::~Scheduler() { Scheduler::~Scheduler() {
@@ -18,25 +13,22 @@ namespace daggy {
} }
std::future<void> std::future<void>
Scheduler::scheduleDAG(std::string runName Scheduler::scheduleDAG(std::string runName, std::vector<Task> tasks,
, std::vector<Task> tasks std::unordered_map<std::string, ParameterValue> parameters, DAG dag
, std::unordered_map<std::string, ParameterValue> parameters ) {
, DAG dag // Initialize the dag if one wasn't provided
)
{
// Initialize the dag
if (dag.empty()) { if (dag.empty()) {
std::unordered_map<std::string, size_t> tids; std::unordered_map<std::string, size_t> taskIDs;
// Add all the vertices // Add all the vertices
for (size_t i = 0; i < tasks.size(); ++i) { for (const auto &task : tasks) {
tids[tasks[i].name] = dag.addVertex(); taskIDs[task.name] = dag.addVertex();
} }
// Add edges // Add edges
for (size_t i = 0; i < tasks.size(); ++i) { for (size_t i = 0; i < tasks.size(); ++i) {
for (const auto &c : tasks[i].children) { for (const auto &c : tasks[i].children) {
dag.addEdge(i, tids[c]); dag.addEdge(i, taskIDs[c]);
} }
} }
dag.reset(); dag.reset();
@@ -47,7 +39,7 @@ namespace daggy {
auto &dr = runs_[runName]; auto &dr = runs_[runName];
dr.tasks = tasks; dr.tasks = tasks;
dr.parameters = parameters; dr.parameters = std::move(parameters);
dr.dag = dag; dr.dag = dag;
dr.taskRuns = std::vector<TaskRun>{tasks.size()}; dr.taskRuns = std::vector<TaskRun>{tasks.size()};
@@ -55,15 +47,14 @@ namespace daggy {
return std::move(schedulers_.addTask([&]() { runDAG(runName, dr); })); return std::move(schedulers_.addTask([&]() { runDAG(runName, dr); }));
} }
void Scheduler::runDAG(const std::string & name, DAGRun & run) void Scheduler::runDAG(const std::string &name, DAGRun &run) {
{ struct TaskState {
struct Task {
size_t tid; size_t tid;
std::future<std::vector<AttemptRecord>> fut; std::future<std::vector<AttemptRecord>> fut;
bool complete; bool complete;
}; };
std::vector<Task> tasks; std::vector<TaskState> tasks;
while (!run.dag.allVisited()) { while (!run.dag.allVisited()) {
@@ -80,13 +71,13 @@ namespace daggy {
} }
} }
// Get the next dag to run // Add all remaining tasks in a task queue to avoid dominating the thread pool
auto tq = std::make_shared<TaskQueue>();
auto t = run.dag.visitNext(); auto t = run.dag.visitNext();
while (t.has_value()) { while (t.has_value()) {
// Schedule the task to run // Schedule the task to run
Task tsk{ .tid = t.value() TaskState tsk{.tid = t.value(), .fut = tq->addTask(
, .fut = executors_.addTask([&](){return runTask(run.tasks[t.value()]);}) [&]() { return runTask(run.tasks[t.value()]); }), .complete = false
, .complete = false
}; };
tasks.push_back(std::move(tsk)); tasks.push_back(std::move(tsk));
@@ -95,6 +86,9 @@ namespace daggy {
if (not nt.has_value()) break; if (not nt.has_value()) break;
t.emplace(nt.value()); t.emplace(nt.value());
} }
if (! tq->empty()) {
executors_.addTasks(tq);
}
std::this_thread::sleep_for(250ms); std::this_thread::sleep_for(250ms);
} }

View File

@@ -5,8 +5,7 @@ using namespace Pistache;
namespace daggy { namespace daggy {
void Server::init(int threads) { void Server::init(int threads) {
auto opts = Http::Endpoint::options() auto opts = Http::Endpoint::options()
.threads(threads) .threads(threads);
;
endpoint_.init(opts); endpoint_.init(opts);
createDescription(); createDescription();
} }
@@ -21,11 +20,11 @@ namespace daggy {
void Server::createDescription() { void Server::createDescription() {
desc_ desc_
.info() .info()
.license("Apache", "http://www.apache.org/licenses/LICENSE-2.0") .license("Apache", "http://www.apache.org/licenses/LICENSE-2.0");
;
auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error, "An error occured with the backend"); auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error,
"An error occured with the backend");
desc_ desc_
.schemes(Rest::Scheme::Http) .schemes(Rest::Scheme::Http)

View File

@@ -34,8 +34,7 @@ std::string slurp(int fd) {
} }
daggy::AttemptRecord daggy::AttemptRecord
ForkingExecutor::runCommand(std::vector<std::string> cmd) ForkingExecutor::runCommand(std::vector<std::string> cmd) {
{
AttemptRecord rec; AttemptRecord rec;
rec.startTime = Clock::now(); rec.startTime = Clock::now();
@@ -48,8 +47,10 @@ daggy::AttemptRecord
argv.push_back(nullptr); argv.push_back(nullptr);
// Create the pipe // Create the pipe
int stdoutPipe[2]; pipe2(stdoutPipe, O_DIRECT); int stdoutPipe[2];
int stderrPipe[2]; pipe2(stderrPipe, O_DIRECT); pipe2(stdoutPipe, O_DIRECT);
int stderrPipe[2];
pipe2(stderrPipe, O_DIRECT);
pid_t child = fork(); pid_t child = fork();
if (child < 0) { if (child < 0) {

View File

@@ -17,7 +17,9 @@ TEST_CASE("Basic Scheduler Execution", "[scheduler]") {
}; };
SECTION("Simple Run") { SECTION("Simple Run") {
auto fut = sched.scheduleDAG("Simple", tasks, {}); auto fut_a = sched.scheduleDAG("Simple 1", tasks, {});
fut.get(); auto fut_b = sched.scheduleDAG("Simple 2", tasks, {});
fut_a.get();
fut_b.get();
} }
} }