Rejigging the DAG traversal so it is a bit more efficient

This commit is contained in:
Ian Roddis
2021-06-11 10:20:15 -03:00
parent 7a6dbb85c2
commit 9cfa8cab7f
6 changed files with 139 additions and 142 deletions

View File

@@ -3,8 +3,8 @@
#include <iostream>
#include <deque>
#include <stdexcept>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <iterator>
#include <functional>
@@ -15,45 +15,45 @@
namespace daggy {
enum class VertexState {
enum class VertexState : uint32_t {
UNVISITED = 0,
VISITING,
VISITED
};
template<typename T>
struct Vertex {
VertexState state;
uint32_t depCount;
std::unordered_set<size_t> children;
};
using Edge = std::pair<size_t,size_t>;
class DAG {
public:
DAG() {}
// Vertices
void addVertex(T id, VertexState state = VertexState::UNVISITED);
void dropVertex(const T & id);
std::set<T> getVertices() const;
std::set<T> getParents(const T & id) const;
std::set<T> getChildren(const T & id) const;
size_t addVertex();
const std::vector<Vertex> & getVertices();
// Edges
void addEdge(const T & src, const T & dst);
void dropEdge(const T & src, const T & dst);
bool hasPath(const T & from, const T & to) const;
void addEdge(const size_t src, const size_t dst);
void dropEdge(const size_t src, const size_t dst);
bool hasPath(const size_t from, const size_t to) const;
const std::vector<Edge> & getEdges();
// Attributes
size_t size() const;
bool empty() const;
// Traversal
void setVisitState(VertexState state);
VertexState getVertexState(const T & id) const;
void reset();
VertexState getVertexState(const size_t id) const;
bool allVisited() const;
std::optional<const T> visitNext();
void completeVisit(const T & id);
std::optional<const size_t> visitNext();
void completeVisit(const size_t id);
private:
std::unordered_map<T, VertexState> vertices_;
std::set<std::pair<T, T>> edges_;
std::vector<Vertex> vertices_;
};
#include "DAGImpl.hpp"
}

View File

@@ -1,136 +1,70 @@
template<typename T>
size_t DAG<T>::size() const {
size_t DAG::size() const { return vertices_.size(); }
bool DAG::empty() const { return vertices_.empty(); }
size_t DAG::addVertex() {
vertices_.push_back(Vertex{.state = VertexState::UNVISITED, .depCount = 0});
return vertices_.size();
}
template<typename T>
bool DAG<T>::empty() const {
return vertices_.empty();
void DAG::dropEdge(const size_t from, const size_t to) {
vertices_[from].children.extract(to);
}
template<typename T>
void DAG<T>::addVertex(T id, VertexState state) {
if (vertices_.find(id) != vertices_.end())
throw std::runtime_error("Vertex already exists in graph");
vertices_[id] = state;
}
template<typename T>
void DAG<T>::dropVertex(const T & id) {
vertices_.extract(id);
for (auto it = edges_.begin(); it != edges_.end(); ) {
if (it->first == id or it->second == id) {
it = edges_.erase(it);
} else {
++it;
}
}
}
template<typename T>
void DAG<T>::dropEdge(const T & from, const T & to) {
for (auto it = edges_.begin(); it != edges_.end(); ) {
if (it->first == from and it->second == to) {
it = edges_.erase(it);
break;
} else {
++it;
}
}
}
template<typename T>
void DAG<T>::addEdge(const T & from, const T & to) {
void DAG::addEdge(const size_t from, const size_t to) {
if (hasPath(to, from))
throw std::runtime_error("Adding edge would result in a cycle");
edges_.emplace(from, to);
vertices_[from].children.insert(to);
}
template<typename T>
bool DAG<T>::hasPath(const T & from, const T & to) const {
bool DAG::hasPath(const size_t from, const size_t to) const {
bool pathFound = false;
for (const auto & pr : edges_) {
if (pr.first != from) continue;
if (pr.second == to) return true;
if (hasPath(pr.second, to)) return true;
for (const auto & child : vertices_[from].children) {
if (child == to) return true;
if (hasPath(child, to)) return true;
}
return false;
}
template<typename T>
std::set<T> DAG<T>::getVertices() const {
std::set<T> vertices;
for (const auto & [v, _] : vertices_) {
vertices.insert(v);
void DAG::reset() {
// Reset the state of all vertices
for (auto & v : vertices_) {
v.state = VertexState::UNVISITED;
v.depCount = 0;
}
return vertices;
}
template<typename T>
std::set<T> DAG<T>::getParents(const T & id) const {
std::set<T> parents;
for (const auto & [p, c] : edges_) {
if (c == id) parents.push_back(p);
// Calculate the upstream count
for (auto & v : vertices_) {
for (auto c : v.children) {
++vertices_[c].depCount;
}
}
return parents;
}
template<typename T>
std::set<T> DAG<T>::getChildren(const T & id) const {
std::set<T> children;
for (const auto & [p, c] : edges_) {
if (p == id) children.push_back(c);
}
return children;
}
template<typename T>
void DAG<T>::setVisitState(VertexState state) {
for (auto & [v, s] : vertices_) s = state;
}
template<typename T>
VertexState DAG<T>::getVertexState(const T & id) const {
return vertices_.at(id);
}
template<typename T>
bool DAG<T>::allVisited() const {
for (const auto & [_, s] : vertices_) {
if (s != VertexState::VISITED) return false;
bool DAG::allVisited() const {
for (const auto & v : vertices_) {
if (v.state != VertexState::VISITED) return false;
}
return true;
}
template<typename T>
std::optional<const T> DAG<T>::visitNext() {
for (auto & [v, s] : vertices_) {
if (s != VertexState::UNVISITED) continue;
std::optional<const size_t > DAG::visitNext() {
for (size_t i = 0; i < vertices_.size(); ++i) {
auto & v = vertices_[i];
// check to see if all parents are completed
bool parentsComplete = true;
for (const auto & [p, c] : edges_) {
if (c != v) continue;
if (vertices_[p] != VertexState::VISITED) {
parentsComplete = false;
break;
}
}
if (! parentsComplete) continue;
s = VertexState::VISITING;
return v;
if (v.state != VertexState::UNVISITED) continue;
if (v.depCount != 0) continue;
v.state = VertexState::VISITING;
return i;
}
return {};
}
template<typename T>
void DAG<T>::completeVisit(const T & id) {
auto it = vertices_.find(id);
if (it == vertices_.end()) return;
it->second = VertexState::VISITED;
void DAG::completeVisit(const size_t id) {
auto & v = vertices_[id];
v.state = VertexState::VISITED;
for (auto c : v.children) {
--vertices_[c].depCount;
}
}

View File

@@ -29,7 +29,7 @@ namespace daggy {
class Executor {
public:
Executor(size_t maxParallelism) : maxParallelism_(maxParallelism);
Executor(size_t maxParallelism) : maxParallelism_(maxParallelism) {}
virtual const std::string getName() const = 0;
// This will block if the executor is full

View File

@@ -4,6 +4,7 @@
#include <unordered_map>
#include <string>
#include "DAG.hpp"
#include "Executor.hpp"
namespace daggy {
@@ -11,8 +12,7 @@ namespace daggy {
public:
// Register an executor
void registerExecutor(std::shared_ptr<Executor> executor);
void runDAG(std::string dagJson);
void runDAG(DAG dag);
private:
std::unordered_map<std::string, std::shared_ptr<Executor>> executors;

View File

@@ -1,4 +1,74 @@
#include <daggy/DAG.hpp>
namespace daggy {
size_t DAG::size() const { return vertices_.size(); }
bool DAG::empty() const { return vertices_.empty(); }
size_t DAG::addVertex() {
vertices_.push_back(Vertex{.state = VertexState::UNVISITED, .depCount = 0});
return vertices_.size();
}
void DAG::dropEdge(const size_t from, const size_t to) {
vertices_[from].children.extract(to);
}
void DAG::addEdge(const size_t from, const size_t to) {
if (hasPath(to, from))
throw std::runtime_error("Adding edge would result in a cycle");
vertices_[from].children.insert(to);
}
bool DAG::hasPath(const size_t from, const size_t to) const {
bool pathFound = false;
for (const auto & child : vertices_[from].children) {
if (child == to) return true;
if (hasPath(child, to)) return true;
}
return false;
}
void DAG::reset() {
// Reset the state of all vertices
for (auto & v : vertices_) {
v.state = VertexState::UNVISITED;
v.depCount = 0;
}
// Calculate the upstream count
for (auto & v : vertices_) {
for (auto c : v.children) {
++vertices_[c].depCount;
}
}
}
bool DAG::allVisited() const {
for (const auto & v : vertices_) {
if (v.state != VertexState::VISITED) return false;
}
return true;
}
std::optional<const size_t > DAG::visitNext() {
for (size_t i = 0; i < vertices_.size(); ++i) {
auto & v = vertices_[i];
if (v.state != VertexState::UNVISITED) continue;
if (v.depCount != 0) continue;
v.state = VertexState::VISITING;
return i;
}
return {};
}
void DAG::completeVisit(const size_t id) {
auto & v = vertices_[id];
v.state = VertexState::VISITED;
for (auto c : v.children) {
--vertices_[c].depCount;
}
}
}

View File

@@ -5,14 +5,14 @@
#include "catch.hpp"
TEST_CASE("DAG Construction Tests", "[dag]") {
daggy::DAG<int> dag;
daggy::DAG dag;
REQUIRE(dag.size() == 0);
REQUIRE(dag.empty());
REQUIRE_NOTHROW(dag.addVertex(0));
REQUIRE_NOTHROW(dag.addVertex());
for (int i = 1; i < 10; ++i) {
dag.addVertex(i);
dag.addVertex();
dag.addEdge(i-1, i);
}
@@ -21,21 +21,14 @@ TEST_CASE("DAG Construction Tests", "[dag]") {
// Cannot add an edge that would result in a cycle
REQUIRE_THROWS(dag.addEdge(9, 5));
SECTION("Visit State") {
dag.setVisitState(daggy::VertexState::VISITING);
for (const auto v : dag.getVertices()) {
REQUIRE(dag.getVertexState(v) == daggy::VertexState::VISITING);
}
}
}
TEST_CASE("DAG Traversal Tests", "[dag]") {
daggy::DAG<int> dag;
daggy::DAG dag;
const int N_VERTICES = 10;
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(i); }
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(); }
/*
0 ---------------------\
@@ -61,7 +54,7 @@ TEST_CASE("DAG Traversal Tests", "[dag]") {
}
SECTION("Baisc Traversal") {
dag.setVisitState(daggy::VertexState::UNVISITED);
dag.reset();
std::vector<int> visitOrder(N_VERTICES);
size_t i = 0;
while (! dag.allVisited()) {