* Formatting code with clang-tidy
* Roughing in more metastore work
This commit is contained in:
17
daggy/include/daggy/AttemptRecord.hpp
Normal file
17
daggy/include/daggy/AttemptRecord.hpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
|
||||
namespace daggy {
|
||||
using Clock = std::chrono::system_clock;
|
||||
|
||||
struct AttemptRecord {
|
||||
std::chrono::time_point<Clock> startTime;
|
||||
std::chrono::time_point<Clock> stopTime;
|
||||
int rc; // RC from the task
|
||||
std::string metaLog; // Logs from the executor
|
||||
std::string output; // stdout from command
|
||||
std::string error; // stderr from command
|
||||
};
|
||||
}
|
||||
@@ -34,24 +34,32 @@ namespace daggy {
|
||||
public:
|
||||
// Vertices
|
||||
size_t addVertex();
|
||||
|
||||
const std::vector<Vertex> &getVertices();
|
||||
|
||||
// Edges
|
||||
void addEdge(const size_t src, const size_t dst);
|
||||
|
||||
void dropEdge(const size_t src, const size_t dst);
|
||||
|
||||
bool hasPath(const size_t from, const size_t to) const;
|
||||
|
||||
const std::vector<Edge> &getEdges();
|
||||
|
||||
// Attributes
|
||||
size_t size() const;
|
||||
|
||||
bool empty() const;
|
||||
|
||||
// Traversal
|
||||
void reset();
|
||||
|
||||
VertexState getVertexState(const size_t id) const;
|
||||
|
||||
bool allVisited() const;
|
||||
|
||||
std::optional<const size_t> visitNext();
|
||||
|
||||
void completeVisit(const size_t id);
|
||||
|
||||
private:
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
size_t DAG::size() const { return vertices_.size(); }
|
||||
bool DAG::empty() const { return vertices_.empty(); }
|
||||
|
||||
size_t DAG::addVertex() {
|
||||
vertices_.push_back(Vertex{.state = VertexState::UNVISITED, .depCount = 0});
|
||||
return vertices_.size();
|
||||
}
|
||||
|
||||
void DAG::dropEdge(const size_t from, const size_t to) {
|
||||
vertices_[from].children.extract(to);
|
||||
}
|
||||
|
||||
void DAG::addEdge(const size_t from, const size_t to) {
|
||||
if (hasPath(to, from))
|
||||
throw std::runtime_error("Adding edge would result in a cycle");
|
||||
vertices_[from].children.insert(to);
|
||||
}
|
||||
|
||||
bool DAG::hasPath(const size_t from, const size_t to) const {
|
||||
bool pathFound = false;
|
||||
|
||||
for (const auto & child : vertices_[from].children) {
|
||||
if (child == to) return true;
|
||||
if (hasPath(child, to)) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void DAG::reset() {
|
||||
// Reset the state of all vertices
|
||||
for (auto & v : vertices_) {
|
||||
v.state = VertexState::UNVISITED;
|
||||
v.depCount = 0;
|
||||
}
|
||||
|
||||
// Calculate the upstream count
|
||||
for (auto & v : vertices_) {
|
||||
for (auto c : v.children) {
|
||||
++vertices_[c].depCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool DAG::allVisited() const {
|
||||
for (const auto & v : vertices_) {
|
||||
if (v.state != VertexState::VISITED) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<const size_t > DAG::visitNext() {
|
||||
for (size_t i = 0; i < vertices_.size(); ++i) {
|
||||
auto & v = vertices_[i];
|
||||
|
||||
if (v.state != VertexState::UNVISITED) continue;
|
||||
if (v.depCount != 0) continue;
|
||||
v.state = VertexState::VISITING;
|
||||
return i;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
void DAG::completeVisit(const size_t id) {
|
||||
auto & v = vertices_[id];
|
||||
v.state = VertexState::VISITED;
|
||||
for (auto c : v.children) {
|
||||
--vertices_[c].depCount;
|
||||
}
|
||||
}
|
||||
21
daggy/include/daggy/DAGRun.hpp
Normal file
21
daggy/include/daggy/DAGRun.hpp
Normal file
@@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
|
||||
#include "DAG.hpp"
|
||||
#include "Task.hpp"
|
||||
#include "AttemptRecord.hpp"
|
||||
|
||||
namespace daggy {
|
||||
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
|
||||
using TaskRun = std::vector<AttemptRecord>;
|
||||
|
||||
struct DAGRun {
|
||||
std::vector<Task> tasks;
|
||||
std::unordered_map<std::string, ParameterValue> parameters;
|
||||
DAG dag;
|
||||
std::vector<TaskRun> taskRuns;
|
||||
};
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "Task.hpp"
|
||||
|
||||
#include "AttemptRecord.hpp"
|
||||
|
||||
/*
|
||||
Executors run Tasks, returning a future with the results.
|
||||
@@ -15,20 +15,10 @@
|
||||
*/
|
||||
|
||||
namespace daggy {
|
||||
using Clock = std::chrono::system_clock;
|
||||
|
||||
struct AttemptRecord {
|
||||
std::chrono::time_point<Clock> startTime;
|
||||
std::chrono::time_point<Clock> stopTime;
|
||||
int rc; // RC from the task
|
||||
std::string metaLog; // Logs from the executor
|
||||
std::string output; // stdout from command
|
||||
std::string error; // stderr from command
|
||||
};
|
||||
|
||||
class Executor {
|
||||
public:
|
||||
Executor() = default;
|
||||
|
||||
virtual const std::string getName() const = 0;
|
||||
|
||||
// This will block if the executor is full
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "DAGRun.hpp"
|
||||
|
||||
/*
|
||||
MetaStore represents the interface to store all the state information
|
||||
for daggy to run. Abstracted in case other back-end solutions need to
|
||||
@@ -9,17 +11,31 @@
|
||||
*/
|
||||
|
||||
namespace daggy {
|
||||
using DAGDefID = int16_t; // future proofing
|
||||
|
||||
// This struct will contain transitions for
|
||||
struct DAGRunEvent { };
|
||||
using DAGDefID = int16_t;
|
||||
using DAGRunID = size_t;
|
||||
|
||||
class MetaStore {
|
||||
// Basic storage + retrieval of DAG Definitions
|
||||
virtual void storeDAGDefinition(std::string name, std::string definition) = 0;
|
||||
virtual DAGDefID storeDAGDefinition(std::string name, std::string definition) = 0;
|
||||
|
||||
virtual DAGDefID getCurrentDAGVersion(std::string name) = 0;
|
||||
|
||||
virtual std::string getDAGDefinition(std::string name, DAGDefID version = -1) = 0;
|
||||
|
||||
// DAG Run State
|
||||
|
||||
/*
|
||||
* startDAGRun // DAG starts up, returns a DAGID for future updates
|
||||
* updateDAGRun // DAG State transitions
|
||||
* updateTaskState // Task state updates
|
||||
*/
|
||||
virtual DAGRunID startDAGRun(std::string dagName, DAGDefID version, DAGRun dagRun
|
||||
) = 0;
|
||||
|
||||
virtual void updateTask(DAGRunID rid, std::string taskName, VertexState state) = 0;
|
||||
|
||||
virtual void updateDAGRun(DAGRunID rid, DAGState state) = 0;
|
||||
|
||||
// Retrievals
|
||||
};
|
||||
}
|
||||
|
||||
@@ -7,14 +7,10 @@
|
||||
|
||||
#include "DAG.hpp"
|
||||
#include "Executor.hpp"
|
||||
#include "DAGRun.hpp"
|
||||
#include "ThreadPool.hpp"
|
||||
|
||||
namespace daggy {
|
||||
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
|
||||
using TaskRun = std::vector<AttemptRecord>;
|
||||
|
||||
class Scheduler {
|
||||
public:
|
||||
enum class DAGState : uint32_t {
|
||||
UNKNOWN = 0,
|
||||
QUEUED,
|
||||
@@ -23,25 +19,21 @@ namespace daggy {
|
||||
COMPLETE
|
||||
};
|
||||
|
||||
class Scheduler {
|
||||
public:
|
||||
public:
|
||||
Scheduler(
|
||||
Executor & executor
|
||||
, size_t executorThreads = 30
|
||||
, size_t schedulerThreads = 10);
|
||||
Executor &executor, size_t executorThreads = 30, size_t schedulerThreads = 10);
|
||||
|
||||
~Scheduler();
|
||||
|
||||
// returns DagRun ID
|
||||
std::future<void>
|
||||
scheduleDAG(std::string runName
|
||||
, std::vector<Task> tasks
|
||||
, std::unordered_map<std::string, ParameterValue> parameters
|
||||
, DAG dag = {} // Allows for loading of an existing DAG
|
||||
scheduleDAG(std::string runName, std::vector<Task> tasks,
|
||||
std::unordered_map<std::string, ParameterValue> parameters,
|
||||
DAG dag = {} // Allows for loading of an existing DAG
|
||||
);
|
||||
|
||||
// get the current status of a DAG
|
||||
DAGState dagRunStatus(std::string runName);
|
||||
|
||||
// get the current DAG
|
||||
DAG dagRunState();
|
||||
|
||||
@@ -49,16 +41,8 @@ namespace daggy {
|
||||
void drain();
|
||||
|
||||
private:
|
||||
|
||||
struct DAGRun {
|
||||
std::vector<Task> tasks;
|
||||
std::unordered_map<std::string, ParameterValue> parameters;
|
||||
DAG dag;
|
||||
std::vector<TaskRun> taskRuns;
|
||||
std::mutex taskGuard_;
|
||||
};
|
||||
|
||||
void runDAG(const std::string &name, DAGRun &dagRun);
|
||||
|
||||
std::vector<AttemptRecord> runTask(const Task &task);
|
||||
|
||||
std::unordered_map<std::string, DAGRun> runs_;
|
||||
|
||||
@@ -10,9 +10,7 @@ namespace daggy {
|
||||
class Server {
|
||||
public:
|
||||
Server(Pistache::Address addr)
|
||||
: endpoint_(addr)
|
||||
, desc_("Daggy API", "0.1")
|
||||
{}
|
||||
: endpoint_(addr), desc_("Daggy API", "0.1") {}
|
||||
|
||||
void init(int threads = 1);
|
||||
|
||||
@@ -26,8 +24,11 @@ namespace daggy {
|
||||
//
|
||||
|
||||
void listDAGs(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
|
||||
|
||||
void upsertDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
|
||||
|
||||
void deleteDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
|
||||
|
||||
void getDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
|
||||
|
||||
//
|
||||
|
||||
@@ -66,10 +66,7 @@ namespace daggy {
|
||||
public:
|
||||
explicit ThreadPool(size_t nWorkers)
|
||||
:
|
||||
tqit_(taskQueues_.begin())
|
||||
, stop_(false)
|
||||
, drain_(false)
|
||||
{
|
||||
tqit_(taskQueues_.begin()), stop_(false), drain_(false) {
|
||||
resize(nWorkers);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ namespace daggy {
|
||||
class ForkingExecutor : public Executor {
|
||||
public:
|
||||
ForkingExecutor() = default;
|
||||
|
||||
const std::string getName() const override { return "ForkingExecutor"; }
|
||||
|
||||
AttemptRecord runCommand(std::vector<std::string> cmd) override;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
namespace daggy {
|
||||
size_t DAG::size() const { return vertices_.size(); }
|
||||
|
||||
bool DAG::empty() const { return vertices_.empty(); }
|
||||
|
||||
size_t DAG::addVertex() {
|
||||
|
||||
@@ -3,13 +3,8 @@
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace daggy {
|
||||
Scheduler::Scheduler(Executor & executor
|
||||
, size_t executorThreads
|
||||
, size_t schedulerThreads)
|
||||
: executor_(executor)
|
||||
, schedulers_(schedulerThreads)
|
||||
, executors_(executorThreads)
|
||||
{ }
|
||||
Scheduler::Scheduler(Executor &executor, size_t executorThreads, size_t schedulerThreads)
|
||||
: executor_(executor), schedulers_(schedulerThreads), executors_(executorThreads) {}
|
||||
|
||||
|
||||
Scheduler::~Scheduler() {
|
||||
@@ -18,25 +13,22 @@ namespace daggy {
|
||||
}
|
||||
|
||||
std::future<void>
|
||||
Scheduler::scheduleDAG(std::string runName
|
||||
, std::vector<Task> tasks
|
||||
, std::unordered_map<std::string, ParameterValue> parameters
|
||||
, DAG dag
|
||||
)
|
||||
{
|
||||
// Initialize the dag
|
||||
Scheduler::scheduleDAG(std::string runName, std::vector<Task> tasks,
|
||||
std::unordered_map<std::string, ParameterValue> parameters, DAG dag
|
||||
) {
|
||||
// Initialize the dag if one wasn't provided
|
||||
if (dag.empty()) {
|
||||
std::unordered_map<std::string, size_t> tids;
|
||||
std::unordered_map<std::string, size_t> taskIDs;
|
||||
|
||||
// Add all the vertices
|
||||
for (size_t i = 0; i < tasks.size(); ++i) {
|
||||
tids[tasks[i].name] = dag.addVertex();
|
||||
for (const auto &task : tasks) {
|
||||
taskIDs[task.name] = dag.addVertex();
|
||||
}
|
||||
|
||||
// Add edges
|
||||
for (size_t i = 0; i < tasks.size(); ++i) {
|
||||
for (const auto &c : tasks[i].children) {
|
||||
dag.addEdge(i, tids[c]);
|
||||
dag.addEdge(i, taskIDs[c]);
|
||||
}
|
||||
}
|
||||
dag.reset();
|
||||
@@ -47,7 +39,7 @@ namespace daggy {
|
||||
auto &dr = runs_[runName];
|
||||
|
||||
dr.tasks = tasks;
|
||||
dr.parameters = parameters;
|
||||
dr.parameters = std::move(parameters);
|
||||
dr.dag = dag;
|
||||
dr.taskRuns = std::vector<TaskRun>{tasks.size()};
|
||||
|
||||
@@ -55,15 +47,14 @@ namespace daggy {
|
||||
return std::move(schedulers_.addTask([&]() { runDAG(runName, dr); }));
|
||||
}
|
||||
|
||||
void Scheduler::runDAG(const std::string & name, DAGRun & run)
|
||||
{
|
||||
struct Task {
|
||||
void Scheduler::runDAG(const std::string &name, DAGRun &run) {
|
||||
struct TaskState {
|
||||
size_t tid;
|
||||
std::future<std::vector<AttemptRecord>> fut;
|
||||
bool complete;
|
||||
};
|
||||
|
||||
std::vector<Task> tasks;
|
||||
std::vector<TaskState> tasks;
|
||||
|
||||
while (!run.dag.allVisited()) {
|
||||
|
||||
@@ -80,13 +71,13 @@ namespace daggy {
|
||||
}
|
||||
}
|
||||
|
||||
// Get the next dag to run
|
||||
// Add all remaining tasks in a task queue to avoid dominating the thread pool
|
||||
auto tq = std::make_shared<TaskQueue>();
|
||||
auto t = run.dag.visitNext();
|
||||
while (t.has_value()) {
|
||||
// Schedule the task to run
|
||||
Task tsk{ .tid = t.value()
|
||||
, .fut = executors_.addTask([&](){return runTask(run.tasks[t.value()]);})
|
||||
, .complete = false
|
||||
TaskState tsk{.tid = t.value(), .fut = tq->addTask(
|
||||
[&]() { return runTask(run.tasks[t.value()]); }), .complete = false
|
||||
};
|
||||
tasks.push_back(std::move(tsk));
|
||||
|
||||
@@ -95,6 +86,9 @@ namespace daggy {
|
||||
if (not nt.has_value()) break;
|
||||
t.emplace(nt.value());
|
||||
}
|
||||
if (! tq->empty()) {
|
||||
executors_.addTasks(tq);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(250ms);
|
||||
}
|
||||
|
||||
@@ -5,8 +5,7 @@ using namespace Pistache;
|
||||
namespace daggy {
|
||||
void Server::init(int threads) {
|
||||
auto opts = Http::Endpoint::options()
|
||||
.threads(threads)
|
||||
;
|
||||
.threads(threads);
|
||||
endpoint_.init(opts);
|
||||
createDescription();
|
||||
}
|
||||
@@ -21,11 +20,11 @@ namespace daggy {
|
||||
void Server::createDescription() {
|
||||
desc_
|
||||
.info()
|
||||
.license("Apache", "http://www.apache.org/licenses/LICENSE-2.0")
|
||||
;
|
||||
.license("Apache", "http://www.apache.org/licenses/LICENSE-2.0");
|
||||
|
||||
|
||||
auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error, "An error occured with the backend");
|
||||
auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error,
|
||||
"An error occured with the backend");
|
||||
|
||||
desc_
|
||||
.schemes(Rest::Scheme::Http)
|
||||
|
||||
@@ -34,8 +34,7 @@ std::string slurp(int fd) {
|
||||
}
|
||||
|
||||
daggy::AttemptRecord
|
||||
ForkingExecutor::runCommand(std::vector<std::string> cmd)
|
||||
{
|
||||
ForkingExecutor::runCommand(std::vector<std::string> cmd) {
|
||||
AttemptRecord rec;
|
||||
|
||||
rec.startTime = Clock::now();
|
||||
@@ -48,8 +47,10 @@ daggy::AttemptRecord
|
||||
argv.push_back(nullptr);
|
||||
|
||||
// Create the pipe
|
||||
int stdoutPipe[2]; pipe2(stdoutPipe, O_DIRECT);
|
||||
int stderrPipe[2]; pipe2(stderrPipe, O_DIRECT);
|
||||
int stdoutPipe[2];
|
||||
pipe2(stdoutPipe, O_DIRECT);
|
||||
int stderrPipe[2];
|
||||
pipe2(stderrPipe, O_DIRECT);
|
||||
|
||||
pid_t child = fork();
|
||||
if (child < 0) {
|
||||
|
||||
@@ -17,7 +17,9 @@ TEST_CASE("Basic Scheduler Execution", "[scheduler]") {
|
||||
};
|
||||
|
||||
SECTION("Simple Run") {
|
||||
auto fut = sched.scheduleDAG("Simple", tasks, {});
|
||||
fut.get();
|
||||
auto fut_a = sched.scheduleDAG("Simple 1", tasks, {});
|
||||
auto fut_b = sched.scheduleDAG("Simple 2", tasks, {});
|
||||
fut_a.get();
|
||||
fut_b.get();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user