* Formatting code with clang-tidy

* Roughing in more metastore work
This commit is contained in:
Ian Roddis
2021-07-22 12:57:51 -03:00
parent 987718334f
commit 0349a5109b
16 changed files with 561 additions and 599 deletions

View File

@@ -0,0 +1,17 @@
#pragma once
#include <chrono>
#include <string>
namespace daggy {
using Clock = std::chrono::system_clock;
struct AttemptRecord {
std::chrono::time_point<Clock> startTime;
std::chrono::time_point<Clock> stopTime;
int rc; // RC from the task
std::string metaLog; // Logs from the executor
std::string output; // stdout from command
std::string error; // stderr from command
};
}

View File

@@ -28,30 +28,38 @@ namespace daggy {
std::unordered_set<size_t> children;
};
using Edge = std::pair<size_t,size_t>;
using Edge = std::pair<size_t, size_t>;
class DAG {
public:
// Vertices
size_t addVertex();
const std::vector<Vertex> & getVertices();
const std::vector<Vertex> &getVertices();
// Edges
void addEdge(const size_t src, const size_t dst);
void dropEdge(const size_t src, const size_t dst);
bool hasPath(const size_t from, const size_t to) const;
const std::vector<Edge> & getEdges();
const std::vector<Edge> &getEdges();
// Attributes
size_t size() const;
bool empty() const;
// Traversal
void reset();
VertexState getVertexState(const size_t id) const;
bool allVisited() const;
std::optional<const size_t> visitNext();
void completeVisit(const size_t id);
private:

View File

@@ -1,70 +0,0 @@
size_t DAG::size() const { return vertices_.size(); }
bool DAG::empty() const { return vertices_.empty(); }
size_t DAG::addVertex() {
vertices_.push_back(Vertex{.state = VertexState::UNVISITED, .depCount = 0});
return vertices_.size();
}
void DAG::dropEdge(const size_t from, const size_t to) {
vertices_[from].children.extract(to);
}
void DAG::addEdge(const size_t from, const size_t to) {
if (hasPath(to, from))
throw std::runtime_error("Adding edge would result in a cycle");
vertices_[from].children.insert(to);
}
bool DAG::hasPath(const size_t from, const size_t to) const {
bool pathFound = false;
for (const auto & child : vertices_[from].children) {
if (child == to) return true;
if (hasPath(child, to)) return true;
}
return false;
}
void DAG::reset() {
// Reset the state of all vertices
for (auto & v : vertices_) {
v.state = VertexState::UNVISITED;
v.depCount = 0;
}
// Calculate the upstream count
for (auto & v : vertices_) {
for (auto c : v.children) {
++vertices_[c].depCount;
}
}
}
bool DAG::allVisited() const {
for (const auto & v : vertices_) {
if (v.state != VertexState::VISITED) return false;
}
return true;
}
std::optional<const size_t > DAG::visitNext() {
for (size_t i = 0; i < vertices_.size(); ++i) {
auto & v = vertices_[i];
if (v.state != VertexState::UNVISITED) continue;
if (v.depCount != 0) continue;
v.state = VertexState::VISITING;
return i;
}
return {};
}
void DAG::completeVisit(const size_t id) {
auto & v = vertices_[id];
v.state = VertexState::VISITED;
for (auto c : v.children) {
--vertices_[c].depCount;
}
}

View File

@@ -0,0 +1,21 @@
#pragma once
#include <string>
#include <unordered_map>
#include <variant>
#include "DAG.hpp"
#include "Task.hpp"
#include "AttemptRecord.hpp"
namespace daggy {
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
using TaskRun = std::vector<AttemptRecord>;
struct DAGRun {
std::vector<Task> tasks;
std::unordered_map<std::string, ParameterValue> parameters;
DAG dag;
std::vector<TaskRun> taskRuns;
};
}

View File

@@ -7,7 +7,7 @@
#include <vector>
#include "Task.hpp"
#include "AttemptRecord.hpp"
/*
Executors run Tasks, returning a future with the results.
@@ -15,20 +15,10 @@
*/
namespace daggy {
using Clock = std::chrono::system_clock;
struct AttemptRecord {
std::chrono::time_point<Clock> startTime;
std::chrono::time_point<Clock> stopTime;
int rc; // RC from the task
std::string metaLog; // Logs from the executor
std::string output; // stdout from command
std::string error; // stderr from command
};
class Executor {
public:
Executor() = default;
virtual const std::string getName() const = 0;
// This will block if the executor is full

View File

@@ -2,6 +2,8 @@
#include <string>
#include "DAGRun.hpp"
/*
MetaStore represents the interface to store all the state information
for daggy to run. Abstracted in case other back-end solutions need to
@@ -9,17 +11,31 @@
*/
namespace daggy {
using DAGDefID = int16_t; // future proofing
// This struct will contain transitions for
struct DAGRunEvent { };
using DAGDefID = int16_t;
using DAGRunID = size_t;
class MetaStore {
// Basic storage + retrieval of DAG Definitions
virtual void storeDAGDefinition(std::string name, std::string definition) = 0;
virtual DAGDefID storeDAGDefinition(std::string name, std::string definition) = 0;
virtual DAGDefID getCurrentDAGVersion(std::string name) = 0;
virtual std::string getDAGDefinition(std::string name, DAGDefID version = -1) = 0;
// DAG Run State
/*
* startDAGRun // DAG starts up, returns a DAGID for future updates
* updateDAGRun // DAG State transitions
* updateTaskState // Task state updates
*/
virtual DAGRunID startDAGRun(std::string dagName, DAGDefID version, DAGRun dagRun
) = 0;
virtual void updateTask(DAGRunID rid, std::string taskName, VertexState state) = 0;
virtual void updateDAGRun(DAGRunID rid, DAGState state) = 0;
// Retrievals
};
}

View File

@@ -7,14 +7,10 @@
#include "DAG.hpp"
#include "Executor.hpp"
#include "DAGRun.hpp"
#include "ThreadPool.hpp"
namespace daggy {
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
using TaskRun = std::vector<AttemptRecord>;
class Scheduler {
public:
enum class DAGState : uint32_t {
UNKNOWN = 0,
QUEUED,
@@ -23,25 +19,21 @@ namespace daggy {
COMPLETE
};
class Scheduler {
public:
public:
Scheduler(
Executor & executor
, size_t executorThreads = 30
, size_t schedulerThreads = 10);
Executor &executor, size_t executorThreads = 30, size_t schedulerThreads = 10);
~Scheduler();
// returns DagRun ID
std::future<void>
scheduleDAG(std::string runName
, std::vector<Task> tasks
, std::unordered_map<std::string, ParameterValue> parameters
, DAG dag = {} // Allows for loading of an existing DAG
scheduleDAG(std::string runName, std::vector<Task> tasks,
std::unordered_map<std::string, ParameterValue> parameters,
DAG dag = {} // Allows for loading of an existing DAG
);
// get the current status of a DAG
DAGState dagRunStatus(std::string runName);
// get the current DAG
DAG dagRunState();
@@ -49,21 +41,13 @@ namespace daggy {
void drain();
private:
void runDAG(const std::string &name, DAGRun &dagRun);
struct DAGRun {
std::vector<Task> tasks;
std::unordered_map<std::string, ParameterValue> parameters;
DAG dag;
std::vector<TaskRun> taskRuns;
std::mutex taskGuard_;
};
void runDAG(const std::string & name, DAGRun & dagRun);
std::vector<AttemptRecord> runTask(const Task & task);
std::vector<AttemptRecord> runTask(const Task &task);
std::unordered_map<std::string, DAGRun> runs_;
std::vector<std::future<void>> futs_;
Executor & executor_;
Executor &executor_;
ThreadPool schedulers_;
ThreadPool executors_;
std::unordered_map<std::string, std::future<void>> jobs;

View File

@@ -10,9 +10,7 @@ namespace daggy {
class Server {
public:
Server(Pistache::Address addr)
: endpoint_(addr)
, desc_("Daggy API", "0.1")
{}
: endpoint_(addr), desc_("Daggy API", "0.1") {}
void init(int threads = 1);
@@ -25,22 +23,25 @@ namespace daggy {
// DAG Definition handlers
//
void listDAGs(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
void upsertDAG(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
void deleteDAG(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
void getDAG(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
void listDAGs(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void upsertDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void deleteDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void getDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
//
// DAG Runs
//
void runDAG(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
void runDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
// List
void getDAGRuns(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
void getDAGRuns(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
// Get status of specific run
void getDAGRun(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
void getDAGRun(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
Pistache::Http::Endpoint endpoint_;
Pistache::Rest::Description desc_;

View File

@@ -24,7 +24,7 @@ namespace daggy {
class TaskQueue {
public:
template<class F, class... Args>
decltype(auto) addTask(F&& f, Args&&... args) {
decltype(auto) addTask(F &&f, Args &&... args) {
// using return_type = std::invoke_result<F, Args...>::type;
using return_type = std::invoke_result_t<F, Args...>;
@@ -58,7 +58,7 @@ namespace daggy {
}
private:
std::queue< std::packaged_task<void()> > tasks_;
std::queue<std::packaged_task<void()> > tasks_;
std::mutex mtx_;
};
@@ -66,10 +66,7 @@ namespace daggy {
public:
explicit ThreadPool(size_t nWorkers)
:
tqit_(taskQueues_.begin())
, stop_(false)
, drain_(false)
{
tqit_(taskQueues_.begin()), stop_(false), drain_(false) {
resize(nWorkers);
}
@@ -78,7 +75,7 @@ namespace daggy {
void shutdown() {
stop_ = true;
cv_.notify_all();
for (std::thread& worker : workers_) {
for (std::thread &worker : workers_) {
if (worker.joinable())
worker.join();
}
@@ -104,15 +101,15 @@ namespace daggy {
workers_.clear();
stop_ = false;
for(size_t i = 0;i< nWorkers;++i)
workers_.emplace_back( [&] {
for (size_t i = 0; i < nWorkers; ++i)
workers_.emplace_back([&] {
while (true) {
std::packaged_task<void()> task;
{
std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [&]{ return stop_ || ! taskQueues_.empty(); });
if(taskQueues_.empty()) {
if(stop_) return;
cv_.wait(lock, [&] { return stop_ || !taskQueues_.empty(); });
if (taskQueues_.empty()) {
if (stop_) return;
continue;
}
if (tqit_ == taskQueues_.end()) tqit_ = taskQueues_.begin();
@@ -130,7 +127,7 @@ namespace daggy {
};
template<class F, class... Args>
decltype(auto) addTask(F&& f, Args&&... args) {
decltype(auto) addTask(F &&f, Args &&... args) {
if (drain_) throw std::runtime_error("Unable to add task to draining pool");
auto tq = std::make_shared<TaskQueue>();
@@ -153,7 +150,7 @@ namespace daggy {
private:
// need to keep track of threads so we can join them
std::vector< std::thread > workers_;
std::vector<std::thread> workers_;
// the task queue
std::list<std::shared_ptr<TaskQueue>> taskQueues_;
std::list<std::shared_ptr<TaskQueue>>::iterator tqit_;

View File

@@ -8,6 +8,7 @@ namespace daggy {
class ForkingExecutor : public Executor {
public:
ForkingExecutor() = default;
const std::string getName() const override { return "ForkingExecutor"; }
AttemptRecord runCommand(std::vector<std::string> cmd) override;

View File

@@ -3,6 +3,7 @@
namespace daggy {
size_t DAG::size() const { return vertices_.size(); }
bool DAG::empty() const { return vertices_.empty(); }
size_t DAG::addVertex() {
@@ -27,7 +28,7 @@ namespace daggy {
bool DAG::hasPath(const size_t from, const size_t to) const {
if (from >= vertices_.size()) throw std::runtime_error("No such vertex " + std::to_string(from));
if (to >= vertices_.size()) throw std::runtime_error("No such vertex " + std::to_string(to));
for (const auto & child : vertices_[from].children) {
for (const auto &child : vertices_[from].children) {
if (child == to) return true;
if (hasPath(child, to)) return true;
}
@@ -37,13 +38,13 @@ namespace daggy {
void DAG::reset() {
// Reset the state of all vertices
for (auto & v : vertices_) {
for (auto &v : vertices_) {
v.state = VertexState::UNVISITED;
v.depCount = 0;
}
// Calculate the upstream count
for (auto & v : vertices_) {
for (auto &v : vertices_) {
for (auto c : v.children) {
++vertices_[c].depCount;
}
@@ -51,7 +52,7 @@ namespace daggy {
}
bool DAG::allVisited() const {
for (const auto & v : vertices_) {
for (const auto &v : vertices_) {
if (v.state != VertexState::VISITED) return false;
}
return true;
@@ -59,7 +60,7 @@ namespace daggy {
std::optional<const size_t> DAG::visitNext() {
for (size_t i = 0; i < vertices_.size(); ++i) {
auto & v = vertices_[i];
auto &v = vertices_[i];
if (v.state != VertexState::UNVISITED) continue;
if (v.depCount != 0) continue;
@@ -70,7 +71,7 @@ namespace daggy {
}
void DAG::completeVisit(const size_t id) {
auto & v = vertices_[id];
auto &v = vertices_[id];
v.state = VertexState::VISITED;
for (auto c : v.children) {
--vertices_[c].depCount;

View File

@@ -3,13 +3,8 @@
using namespace std::chrono_literals;
namespace daggy {
Scheduler::Scheduler(Executor & executor
, size_t executorThreads
, size_t schedulerThreads)
: executor_(executor)
, schedulers_(schedulerThreads)
, executors_(executorThreads)
{ }
Scheduler::Scheduler(Executor &executor, size_t executorThreads, size_t schedulerThreads)
: executor_(executor), schedulers_(schedulerThreads), executors_(executorThreads) {}
Scheduler::~Scheduler() {
@@ -18,25 +13,22 @@ namespace daggy {
}
std::future<void>
Scheduler::scheduleDAG(std::string runName
, std::vector<Task> tasks
, std::unordered_map<std::string, ParameterValue> parameters
, DAG dag
)
{
// Initialize the dag
Scheduler::scheduleDAG(std::string runName, std::vector<Task> tasks,
std::unordered_map<std::string, ParameterValue> parameters, DAG dag
) {
// Initialize the dag if one wasn't provided
if (dag.empty()) {
std::unordered_map<std::string, size_t> tids;
std::unordered_map<std::string, size_t> taskIDs;
// Add all the vertices
for (size_t i = 0; i < tasks.size(); ++i) {
tids[tasks[i].name] = dag.addVertex();
for (const auto &task : tasks) {
taskIDs[task.name] = dag.addVertex();
}
// Add edges
for (size_t i = 0; i < tasks.size(); ++i) {
for (const auto & c : tasks[i].children) {
dag.addEdge(i, tids[c]);
for (const auto &c : tasks[i].children) {
dag.addEdge(i, taskIDs[c]);
}
}
dag.reset();
@@ -44,10 +36,10 @@ namespace daggy {
// Create the DAGRun
std::lock_guard<std::mutex> guard(mtx_);
auto & dr = runs_[runName];
auto &dr = runs_[runName];
dr.tasks = tasks;
dr.parameters = parameters;
dr.parameters = std::move(parameters);
dr.dag = dag;
dr.taskRuns = std::vector<TaskRun>{tasks.size()};
@@ -55,20 +47,19 @@ namespace daggy {
return std::move(schedulers_.addTask([&]() { runDAG(runName, dr); }));
}
void Scheduler::runDAG(const std::string & name, DAGRun & run)
{
struct Task {
void Scheduler::runDAG(const std::string &name, DAGRun &run) {
struct TaskState {
size_t tid;
std::future<std::vector<AttemptRecord>> fut;
bool complete;
};
std::vector<Task> tasks;
std::vector<TaskState> tasks;
while (! run.dag.allVisited()) {
while (!run.dag.allVisited()) {
// Check for any completed tasks
for (auto & task : tasks) {
for (auto &task : tasks) {
if (task.complete) continue;
if (task.fut.valid()) {
@@ -80,13 +71,13 @@ namespace daggy {
}
}
// Get the next dag to run
// Add all remaining tasks in a task queue to avoid dominating the thread pool
auto tq = std::make_shared<TaskQueue>();
auto t = run.dag.visitNext();
while (t.has_value()) {
// Schedule the task to run
Task tsk{ .tid = t.value()
, .fut = executors_.addTask([&](){return runTask(run.tasks[t.value()]);})
, .complete = false
TaskState tsk{.tid = t.value(), .fut = tq->addTask(
[&]() { return runTask(run.tasks[t.value()]); }), .complete = false
};
tasks.push_back(std::move(tsk));
@@ -95,13 +86,16 @@ namespace daggy {
if (not nt.has_value()) break;
t.emplace(nt.value());
}
if (! tq->empty()) {
executors_.addTasks(tq);
}
std::this_thread::sleep_for(250ms);
}
}
std::vector<AttemptRecord>
Scheduler::runTask(const Task & task) {
Scheduler::runTask(const Task &task) {
std::vector<AttemptRecord> attempts;
while (attempts.size() < task.max_retries) {

View File

@@ -5,8 +5,7 @@ using namespace Pistache;
namespace daggy {
void Server::init(int threads) {
auto opts = Http::Endpoint::options()
.threads(threads)
;
.threads(threads);
endpoint_.init(opts);
createDescription();
}
@@ -21,11 +20,11 @@ namespace daggy {
void Server::createDescription() {
desc_
.info()
.license("Apache", "http://www.apache.org/licenses/LICENSE-2.0")
;
.license("Apache", "http://www.apache.org/licenses/LICENSE-2.0");
auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error, "An error occured with the backend");
auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error,
"An error occured with the backend");
desc_
.schemes(Rest::Scheme::Http)

View File

@@ -16,7 +16,7 @@ std::string slurp(int fd) {
const ssize_t BUFFER_SIZE = 4096;
char buffer[BUFFER_SIZE];
struct pollfd pfd{ .fd = fd, .events = POLLIN, .revents = 0 };
struct pollfd pfd{.fd = fd, .events = POLLIN, .revents = 0};
poll(&pfd, 1, 1);
while (pfd.revents & POLLIN) {
@@ -34,22 +34,23 @@ std::string slurp(int fd) {
}
daggy::AttemptRecord
ForkingExecutor::runCommand(std::vector<std::string> cmd)
{
ForkingExecutor::runCommand(std::vector<std::string> cmd) {
AttemptRecord rec;
rec.startTime = Clock::now();
// Need to convert the strings
std::vector<char *> argv;
for (const auto & s : cmd) {
for (const auto &s : cmd) {
argv.push_back(const_cast<char *>(s.c_str()));
}
argv.push_back(nullptr);
// Create the pipe
int stdoutPipe[2]; pipe2(stdoutPipe, O_DIRECT);
int stderrPipe[2]; pipe2(stderrPipe, O_DIRECT);
int stdoutPipe[2];
pipe2(stdoutPipe, O_DIRECT);
int stderrPipe[2];
pipe2(stderrPipe, O_DIRECT);
pid_t child = fork();
if (child < 0) {
@@ -64,8 +65,8 @@ daggy::AttemptRecord
}
std::atomic<bool> running = true;
std::thread stdoutReader([&]() { while(running) rec.output.append(slurp(stdoutPipe[0])); });
std::thread stderrReader([&]() { while(running) rec.error.append(slurp(stderrPipe[0])); });
std::thread stdoutReader([&]() { while (running) rec.output.append(slurp(stdoutPipe[0])); });
std::thread stderrReader([&]() { while (running) rec.error.append(slurp(stderrPipe[0])); });
int rc = 0;
waitpid(child, &rc, 0);

View File

@@ -17,7 +17,9 @@ TEST_CASE("Basic Scheduler Execution", "[scheduler]") {
};
SECTION("Simple Run") {
auto fut = sched.scheduleDAG("Simple", tasks, {});
fut.get();
auto fut_a = sched.scheduleDAG("Simple 1", tasks, {});
auto fut_b = sched.scheduleDAG("Simple 2", tasks, {});
fut_a.get();
fut_b.get();
}
}