- Adding StdOutLogger and adding tests for DAG execution to verify it works.

- Roughing in FileSystemLogger - Deleting Scheduler code and associated unit tests as being too complicated for maintenance. - Refactoring namespaces for loggers and executors.
2021-08-09 14:59:23 -03:00
parent 6a2c2ddb9a
commit a8e85f8feb
21 changed files with 382 additions and 217 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,8 +10,10 @@ find_package (Threads REQUIRED)

 include(cmake/rapidjson.cmake)
 include(cmake/Pistache.cmake)
+include(cmake/MagicEnum.cmake)

 include_directories(${RAPIDJSON_INCLUDE_DIR})
+include_directories(${MAGIC_ENUM_INCLUDE_DIR})

 add_subdirectory(daggy)
 add_subdirectory(tests)
--- a/cmake/MagicEnum.cmake
+++ b/cmake/MagicEnum.cmake
@@ -0,0 +1,17 @@
+include(ExternalProject)
+# Download RapidJSON
+ExternalProject_Add(
+        magic-enum
+        PREFIX "third_party/magic-enum"
+        GIT_REPOSITORY "https://github.com/Neargye/magic_enum"
+        GIT_TAG "v0.7.3"
+        TIMEOUT 10
+        CONFIGURE_COMMAND ""
+        BUILD_COMMAND ""
+        INSTALL_COMMAND ""
+        UPDATE_COMMAND ""
+)
+
+# Magic Enums is a header-only
+ExternalProject_Get_Property(magic-enum source_dir)
+set(MAGIC_ENUM_INCLUDE_DIR ${source_dir}/include)
--- a/daggy/CMakeLists.txt
+++ b/daggy/CMakeLists.txt
@@ -7,4 +7,4 @@ add_library(${PROJECT_NAME} STATIC ${SOURCES})
 include_directories(${PISTACHE_INCLUDE_DIR})
 target_include_directories(${PROJECT_NAME} PUBLIC include)
 target_link_libraries(${PROJECT_NAME} pistache pthread)
-add_dependencies(${PROJECT_NAME} PistacheDownload rapidjson)
+add_dependencies(${PROJECT_NAME} PistacheDownload rapidjson magic-enum)
--- a/daggy/include/daggy/AttemptRecord.hpp
+++ b/daggy/include/daggy/AttemptRecord.hpp
@@ -10,7 +10,7 @@ namespace daggy {
        TimePoint startTime;
        TimePoint stopTime;
        int rc;       // RC from the task
-        std::string metaLog;  // Logs from the executor
+        std::string metaLog;  // Logs from the dag_executor
        std::string output;   // stdout from command
        std::string error;    // stderr from command
    };
--- a/daggy/include/daggy/DAGLogger.hpp
+++ b/daggy/include/daggy/DAGLogger.hpp
@@ -1,68 +0,0 @@
-#pragma once
-
-#include <string>
-
-#include "DAGRun.hpp"
-
-/*
-   DAGLogger represents the interface to store all the state information
-   for daggy to run. Abstracted in case other back-end solutions need to
-   be supported.
-*/
-
-namespace daggy {
-    using DAGDefID = int16_t;
-    using DAGRunID = size_t;
-
-    enum class RunState : uint32_t {
-        QUEUED = 0,
-        RUNNING = 1,
-        ERRORED = 1 << 1,
-        KILLED = 1 << 2,
-        COMPLETED = 1 << 3
-    };
-
-    struct TaskUpdateRecord {
-        TimePoint time;
-        size_t taskID;
-        RunState newState;
-    };
-
-    struct DAGUpdateRecord {
-        TimePoint time;
-        RunState newState;
-    };
-
-    // Pretty heavy weight, but
-    struct DAGRunRecord {
-        std::string name;
-        std::vector<Task> tasks;
-        std::vector<RunState> runStates;
-        std::vector<std::vector<AttemptRecord>> taskAttempts;
-        std::vector<TaskUpdateRecord> taskStateChanges;
-        std::vector<DAGUpdateRecord> dagStateChanges;
-    };
-
-    struct DAGRunSummary {
-        DAGRunID runID;
-        std::string name;
-        RunState  runState;
-        TimePoint startTime;
-        TimePoint lastUpdate;
-        std::unordered_map<RunState, size_t> taskStates;
-    };
-
-    class DAGLogger {
-    public:
-        // Execution
-        virtual DAGRunID startDAGRun(std::string name, const std::vector<Task> & tasks) = 0;
-        virtual void updateDAGRunState(DAGRunID dagRunId, RunState state) = 0;
-        virtual void logTaskAttempt(DAGRunID, size_t taskID, const AttemptRecord & attempt) = 0;
-        virtual void markTaskComplete(DAGRunID dagRun, size_t taskID) = 0;
-        virtual void updateTaskState(DAGRunID dagRunId, RunState state) = 0;
-
-        // Querying
-        virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) = 0;
-        virtual DAGRunRecord getDAGRun(DAGRunID dagRunId) = 0;
-    };
-}
--- a/daggy/include/daggy/Defines.hpp
+++ b/daggy/include/daggy/Defines.hpp
@@ -19,4 +19,5 @@ namespace daggy {
    // DAG Runs
    using DAGDefID = int16_t;
    using DAGRunID = size_t;
+    using TaskID = size_t;
 }
--- a/daggy/include/daggy/TaskExecutor.hpp
+++ b/daggy/include/daggy/TaskExecutor.hpp
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <chrono>
-#include <future>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include "Task.hpp"
-#include "AttemptRecord.hpp"
-#include "ThreadPool.hpp"
-
-/*
-   Executors run Tasks, returning a future with the results.
-   If there are many retries, logs are returned for each attempt.
-*/
-
-namespace daggy {
-    class TaskExecutor {
-    public:
-        TaskExecutor(size_t nThreads) : threadPool(nThreads) {};
-
-        virtual const std::string getName() const = 0;
-
-        // This will block if the executor is full
-        virtual AttemptRecord runCommand(std::vector<std::string> cmd) = 0;
-
-        ThreadPool threadPool;
-    };
-}
--- a/daggy/include/daggy/Utilities.hpp
+++ b/daggy/include/daggy/Utilities.hpp
@@ -7,19 +7,29 @@

 #include <rapidjson/document.h>

-#include "DAGLogger.hpp"
-#include "TaskExecutor.hpp"
+#include "daggy/loggers/dag_run/DAGLoggerBase.hpp"
+#include "daggy/executors/task/TaskExecutor.hpp"
 #include "Task.hpp"
 #include "Defines.hpp"
+#include "DAG.hpp"

 namespace daggy {
    std::vector<Command> expandCommands(const std::vector<std::string> & command, const ParameterValues & parameters);

+    DAG buildDAGFromTasks(const std::vector<Task> & tasks);
+
    // Blocking call
+    std::vector<AttemptRecord>
+            runTask(DAGRunID runID,
+                    TaskID taskID,
+                const Task & task,
+                executors::task::TaskExecutor & executor,
+                loggers::dag_run::DAGLoggerBase & logger);
+
    void runDAG(DAGRunID runID,
                    std::vector<Task> tasks,
-                    TaskExecutor & executor,
-                    DAGLogger & logger,
+                    executors::task::TaskExecutor & executor,
+                    loggers::dag_run::DAGLoggerBase & logger,
                    DAG dag);

 }
--- a/daggy/include/daggy/dagloggers/FileSystemLogger.hpp
+++ b/daggy/include/daggy/dagloggers/FileSystemLogger.hpp
@@ -1,59 +0,0 @@
-#pragma once
-
-#include <filesystem>
-
-#include <rapidjson/document.h>
-#include "../DAGLogger.hpp"
-
-namespace fs = std::filesystem;
-namespace rj = rapidjson;
-
-namespace daggy {
-    /*
-     * This logger should only be used for debug purposes. It's not really optimized for querying, and will
-     * use a ton of inodes to track state.
-     *
-     * On the plus side, it's trivial to look at without using the API.
-     *
-     * Filesystem logger creates the following structure:
-     *   {root}/
-     *       current/
-     *          {DAGRunID}.{STATE} -- A file for each DAG not in a COMPLETE state for faster lookups
-     *       runs/
-     *         {runID}/
-     *            meta.json   --- Contains the DAG name, task definitions
-     *            {taskID}/
-     *                states  --- State changes
-     *                {attempt}/
-     *                    meta.json --- timestamps and rc
-     *                    stdout
-     *                    stderr
-     *                    execlog
-     */
-    class FileSystemLogger : DAGLogger {
-    public:
-        FileSystemLogger(fs::path root);
-
-        // Execution
-        virtual DAGRunID startDAGRun(std::string name, const std::vector<Task> & tasks) override;
-        virtual void updateDAGRunState(DAGRunID dagRunId, RunState state) override;
-        virtual void logTaskAttempt(DAGRunID, size_t taskID, const AttemptRecord & attempt) override;
-        virtual void markTaskComplete(DAGRunID dagRun, size_t taskID) override;
-        virtual void updateTaskState(DAGRunID dagRunId, RunState state) override;
-
-        // Querying
-        virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
-        virtual DAGRunRecord getDAGRun(DAGRunID dagRunId);
-
-    private:
-        fs::path root_;
-        std::atomic<DAGRunID> nextRunID_;
-        std::mutex lock_;
-
-        std::unordered_map<fs::path, std::mutex> runLocks;
-
-        inline const fs::path getCurrentPath() const;
-        inline const fs::path getRunsRoot() const;
-        inline const fs::path getRunRoot(DAGRunID runID) const;
-    };
-}
--- a/daggy/include/daggy/executors/ForkingTaskExecutor.hpp
+++ b/daggy/include/daggy/executors/ForkingTaskExecutor.hpp
@@ -1,18 +0,0 @@
-#pragma once
-
-#include "../TaskExecutor.hpp"
-
-namespace daggy {
-    namespace executor {
-        class ForkingTaskExecutor : public TaskExecutor {
-        public:
-            ForkingTaskExecutor(size_t nThreads)
-                : TaskExecutor(nThreads)
-                {}
-
-            const std::string getName() const override { return "ForkingTaskExecutor"; }
-
-            AttemptRecord runCommand(std::vector<std::string> cmd) override;
-        };
-    }
-}
--- a/daggy/include/daggy/executors/task/ForkingTaskExecutor.hpp
+++ b/daggy/include/daggy/executors/task/ForkingTaskExecutor.hpp
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "TaskExecutor.hpp"
+
+namespace daggy {
+    namespace executors {
+        namespace task {
+            class ForkingTaskExecutor : public TaskExecutor {
+            public:
+                ForkingTaskExecutor(size_t nThreads)
+                : TaskExecutor(nThreads)
+                {}
+
+                const std::string getName() const override { return "ForkingTaskExecutor"; }
+
+                AttemptRecord runCommand(std::vector<std::string> cmd) override;
+            };
+        }
+    }
+}
--- a/daggy/include/daggy/executors/task/TaskExecutor.hpp
+++ b/daggy/include/daggy/executors/task/TaskExecutor.hpp
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <chrono>
+#include <future>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "daggy/Task.hpp"
+#include "daggy/AttemptRecord.hpp"
+#include "daggy/ThreadPool.hpp"
+
+/*
+   Executors run Tasks, returning a future with the results.
+   If there are many retries, logs are returned for each attempt.
+*/
+
+namespace daggy {
+    namespace executors {
+        namespace task {
+            class TaskExecutor {
+            public:
+                TaskExecutor(size_t nThreads) : threadPool(nThreads) {};
+
+                virtual const std::string getName() const = 0;
+
+                // This will block if the dag_executor is full
+                virtual AttemptRecord runCommand(std::vector<std::string> cmd) = 0;
+
+                ThreadPool threadPool;
+            };
+        }
+    }
+}
--- a/daggy/include/daggy/loggers/dag_run/DAGLoggerBase.hpp
+++ b/daggy/include/daggy/loggers/dag_run/DAGLoggerBase.hpp
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <string>
+
+#include "daggy/DAGRun.hpp"
+
+/*
+   DAGLoggerBase represents the interface to store all the state information
+   for daggy to run. Abstracted in case other back-end solutions need to
+   be supported.
+*/
+
+namespace daggy {
+    namespace loggers {
+        namespace dag_run {
+            enum class RunState : uint32_t {
+                QUEUED = 0,
+                RUNNING = 1,
+                RETRY   = 1 << 1,
+                ERRORED = 1 << 2,
+                KILLED = 1 << 3,
+                COMPLETED = 1 << 4
+            };
+
+            struct TaskUpdateRecord {
+                TimePoint time;
+                TaskID taskID;
+                RunState newState;
+            };
+
+            struct DAGUpdateRecord {
+                TimePoint time;
+                RunState newState;
+            };
+
+            // Pretty heavy weight, but
+            struct DAGRunRecord {
+                std::string name;
+                std::vector<Task> tasks;
+                std::vector<RunState> runStates;
+                std::vector<std::vector<AttemptRecord>> taskAttempts;
+                std::vector<TaskUpdateRecord> taskStateChanges;
+                std::vector<DAGUpdateRecord> dagStateChanges;
+            };
+
+            struct DAGRunSummary {
+                DAGRunID runID;
+                std::string name;
+                RunState  runState;
+                TimePoint startTime;
+                TimePoint lastUpdate;
+                std::unordered_map<RunState, size_t> taskStateCounts;
+            };
+
+            class DAGLoggerBase {
+            public:
+                // Execution
+                virtual DAGRunID startDAGRun(std::string name, const std::vector<Task> & tasks) = 0;
+                virtual void updateDAGRunState(DAGRunID dagRunId, RunState state) = 0;
+                virtual void logTaskAttempt(DAGRunID, TaskID taskID, const AttemptRecord & attempt) = 0;
+                virtual void updateTaskState(DAGRunID dagRunId, TaskID taskID, RunState state) = 0;
+
+                // Querying
+                virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) = 0;
+                virtual DAGRunRecord getDAGRun(DAGRunID dagRunId) = 0;
+            };
+        }
+    }
+}
--- a/daggy/include/daggy/loggers/dag_run/FileSystemLogger.hpp
+++ b/daggy/include/daggy/loggers/dag_run/FileSystemLogger.hpp
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <filesystem>
+#include <atomic>
+#include <mutex>
+
+#include <rapidjson/document.h>
+#include "DAGLoggerBase.hpp"
+
+namespace fs = std::filesystem;
+namespace rj = rapidjson;
+
+namespace daggy {
+    namespace loggers {
+        namespace dag_run {
+            /*
+             * This logger should only be used for debug purposes. It's not really optimized for querying, and will
+             * use a ton of inodes to track state.
+             *
+             * On the plus side, it's trivial to look at without using the API.
+             *
+             * Filesystem logger creates the following structure:
+             *   {root}/
+             *       current/
+             *          {DAGRunID}.{STATE} -- A file for each DAG not in a COMPLETE state for faster lookups
+             *       runs/
+             *         {runID}/
+             *            meta.json   --- Contains the DAG name, task definitions
+             *            {taskID}/
+             *                states  --- State changes
+             *                {attempt}/
+             *                    meta.json --- timestamps and rc
+             *                    stdout
+             *                    stderr
+             *                    execlog
+             */
+            class FileSystemLogger : DAGLoggerBase {
+            public:
+                FileSystemLogger(fs::path root);
+
+                // Execution
+                virtual DAGRunID startDAGRun(std::string name, const std::vector<Task> & tasks) override;
+                virtual void updateDAGRunState(DAGRunID dagRunId, RunState state) override;
+                virtual void logTaskAttempt(DAGRunID, TaskID taskID, const AttemptRecord & attempt) override;
+                virtual void updateTaskState(DAGRunID dagRunId, TaskID taskID, RunState state) override;
+
+                // Querying
+                virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
+                virtual DAGRunRecord getDAGRun(DAGRunID dagRunId);
+
+            private:
+                fs::path root_;
+                std::atomic<DAGRunID> nextRunID_;
+                std::mutex lock_;
+
+                // std::unordered_map<fs::path, std::mutex> runLocks;
+
+                inline const fs::path getCurrentPath() const;
+                inline const fs::path getRunsRoot() const;
+                inline const fs::path getRunRoot(DAGRunID runID) const;
+            };
+        }
+    }
+}
--- a/daggy/include/daggy/loggers/dag_run/StdOutLogger.hpp
+++ b/daggy/include/daggy/loggers/dag_run/StdOutLogger.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <iostream>
+#include <mutex>
+
+#include "DAGLoggerBase.hpp"
+
+namespace daggy {
+    namespace loggers {
+        namespace dag_run {
+            /*
+             * This logger should only be used for debug purposes. It doesn't actually log anything, just prints stuff
+             * to stdout.
+             */
+            class StdOutLogger : public DAGLoggerBase {
+            public:
+                StdOutLogger();
+
+                // Execution
+                virtual DAGRunID startDAGRun(std::string name, const std::vector<Task> & tasks) override;
+                virtual void updateDAGRunState(DAGRunID dagRunId, RunState state) override;
+                virtual void logTaskAttempt(DAGRunID, TaskID taskID, const AttemptRecord & attempt) override;
+                virtual void updateTaskState(DAGRunID dagRunId, TaskID taskID, RunState state) override;
+
+                // Querying
+                virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
+                virtual DAGRunRecord getDAGRun(DAGRunID dagRunId);
+
+            private:
+                DAGRunID nextRunID_;
+                std::mutex guard_;
+            };
+        }
+    }
+}
--- a/daggy/src/Utilities.cpp
+++ b/daggy/src/Utilities.cpp
@@ -30,11 +30,49 @@ namespace daggy {
        return commands;
    }

+    DAG buildDAGFromTasks(const std::vector<Task> & tasks) {
+        DAG dag;
+        std::unordered_map<std::string, size_t> taskIDs;
+
+        // Add all the vertices
+        for (const auto &task : tasks) {
+            taskIDs[task.name] = dag.addVertex();
+        }
+
+        // Add edges
+        for (size_t i = 0; i < tasks.size(); ++i) {
+            for (const auto &c : tasks[i].children) {
+                dag.addEdge(i, taskIDs[c]);
+            }
+        }
+        dag.reset();
+        return dag;
+    }
+
+    std::vector<AttemptRecord> runTask(DAGRunID runID,
+                    TaskID taskID,
+                    const Task & task,
+                    executors::task::TaskExecutor & executor,
+                    loggers::dag_run::DAGLoggerBase & logger)
+                    {
+        std::vector<AttemptRecord> attempts;
+        logger.updateTaskState(runID, taskID, loggers::dag_run::RunState::RUNNING );
+
+        while (attempts.size() < task.maxRetries + 1) {
+            attempts.push_back(executor.runCommand(task.command));
+            logger.logTaskAttempt(runID, taskID, attempts.back());
+            if (attempts.back().rc == 0) break;
+            logger.updateTaskState(runID, taskID, loggers::dag_run::RunState::RETRY );
+        }
+        return attempts;
+    }
+
    void runDAG(DAGRunID runID,
                std::vector<Task> tasks,
-                TaskExecutor & executor,
-                DAGLogger & logger,
+                executors::task::TaskExecutor & executor,
+                loggers::dag_run::DAGLoggerBase & logger,
                DAG dag) {
+        logger.updateDAGRunState(runID, loggers::dag_run::RunState::RUNNING);

        struct TaskState {
            size_t tid;
@@ -51,10 +89,17 @@ namespace daggy {

                if (taskState.fut.valid()) {
                    auto attemptRecords = taskState.fut.get();
-                    if (attemptRecords.back().rc == 0) {
-                        dag.completeVisit(taskState.tid);
+                    if (attemptRecords.empty()) {
+                        logger.updateTaskState(runID, taskState.tid, loggers::dag_run::RunState::ERRORED );
+                        continue;
+                    }
+                    if (attemptRecords.back().rc == 0) {
+                        logger.updateTaskState(runID, taskState.tid, loggers::dag_run::RunState::COMPLETED );
+                        dag.completeVisit(taskState.tid);
+                        taskState.complete = true;
+                    } else {
+                        logger.updateTaskState(runID, taskState.tid, loggers::dag_run::RunState::ERRORED );
                    }
-                    taskState.complete = true;
                }
            }

@@ -66,21 +111,11 @@ namespace daggy {
                auto tid = t.value();
                TaskState tsk{
                    .tid = tid,
-                    .fut = tq->addTask(
-                            [tid, &tasks, &executor]() {
-                                std::vector<AttemptRecord> attempts;
-
-                                while (attempts.size() < tasks[tid].maxRetries) {
-                                    attempts.push_back(executor.runCommand(tasks[tid].command));
-                                    if (attempts.back().rc == 0) break;
-                                }
-                                return attempts;
-                            })
-                                    , .complete = false
+                    .fut = tq->addTask([tid, runID, &tasks, &executor, &logger]() {return runTask(runID, tid, tasks[tid], executor, logger);}),
+                    .complete = false
                };
                taskStates.push_back(std::move(tsk));

-                //
                auto nextTask = dag.visitNext();
                if (not nextTask.has_value()) break;
                t.emplace(nextTask.value());
--- a/daggy/src/dagloggers/FileSystemLogger.cpp
+++ b/daggy/src/dagloggers/FileSystemLogger.cpp
@@ -1,28 +1,32 @@
-#include <daggy/dagloggers/FileSystemLogger.hpp>
+#include <daggy/loggers/dag_run/FileSystemLogger.hpp>

 namespace fs = std::filesystem;

+using namespace daggy::loggers::dag_run;
+
 namespace daggy {
    inline const fs::path FileSystemLogger::getCurrentPath() const { return root_ / "current"; }
    inline const fs::path FileSystemLogger::getRunsRoot() const { return root_ / "runs"; }
-    inline const fs::path getRunRoot(DAGRunID runID) const { return getRunsRoot() / std::to_string(runID); }
+    inline const fs::path FileSystemLogger::getRunRoot(DAGRunID runID) const { return getRunsRoot() / std::to_string(runID); }

-    FileSystemLogger(fs::path root)
+    FileSystemLogger::FileSystemLogger(fs::path root)
    : root_(root)
    , nextRunID_(0)
    {
-        const std::vector<fs::paths> reqPaths{ root_, getCurrentPath(), getRunsRoot()};
+        const std::vector<fs::path> reqPaths{ root_, getCurrentPath(), getRunsRoot()};
        for (const auto & path : reqPaths) {
            if (! fs::exists(path)) { fs::create_directory(path); }
        }

        // Get the next run ID
        size_t runID = 0;
-        for (auto & dir : fs::std::filesystem::directory_iterator(getRunsRoot())) {
+        for (auto & dir : fs::directory_iterator(getRunsRoot())) {
            try {
-                runID = std::stoull(dir.stem());
+                runID = std::stoull(dir.path().stem());
                if (runID > nextRunID_) nextRunID_ = runID + 1;
-            } catch {}
+            } catch (std::exception & e) {
+                continue;
+            }
        }
    }

@@ -32,14 +36,13 @@ namespace daggy {

        // TODO make this threadsafe
        fs::path runDir = getRunRoot(runID);
-        std::lock_guard<std::mutex> guard(runLocks[runDir]);
+        // std::lock_guard<std::mutex> guard(runLocks[runDir]);

        // Init the directory
    }
-    void FileSystemLogger::updateDAGRunState(DAGRunID dagRunId, RunState state){ }
+    void FileSystemLogger::updateDAGRunState(DAGRunID dagRunID, RunState state){ }
    void FileSystemLogger::logTaskAttempt(DAGRunID, size_t taskID, const AttemptRecord & attempt){ }
-    void FileSystemLogger::markTaskComplete(DAGRunID dagRun, size_t taskID){ }
-    void FileSystemLogger::updateTaskState(DAGRunID dagRunId, RunState state){ }
+    void FileSystemLogger::updateTaskState(DAGRunID dagRunId, TaskID taskID, RunState state){ }

    // Querying
    std::vector<DAGRunSummary> FileSystemLogger::getDAGs(uint32_t stateMask){ }
--- a/daggy/src/dagloggers/StdOutLogger.cpp
+++ b/daggy/src/dagloggers/StdOutLogger.cpp
@@ -0,0 +1,39 @@
+#include <magic_enum.hpp>
+
+#include <daggy/loggers/dag_run/StdOutLogger.hpp>
+
+namespace daggy {
+    namespace loggers {
+        namespace dag_run {
+            StdOutLogger::StdOutLogger() : nextRunID_(0) { }
+
+            // Execution
+            DAGRunID StdOutLogger::startDAGRun(std::string name, const std::vector<Task> & tasks) {
+                std::lock_guard<std::mutex> lock(guard_);
+                size_t runID = nextRunID_++;
+                std::cout << "Starting new DAGRun named " << name << " with ID " << runID << " and " << tasks.size() << " tasks" << std::endl;
+                return runID;
+            }
+
+            void StdOutLogger::updateDAGRunState(DAGRunID dagRunID, RunState state){
+                std::lock_guard<std::mutex> lock(guard_);
+                std::cout << "DAG State Change(" << dagRunID << "): " << magic_enum::enum_name(state) << std::endl;
+            }
+
+            void StdOutLogger::logTaskAttempt(DAGRunID dagRunID, size_t taskID, const AttemptRecord & attempt){
+                std::lock_guard<std::mutex> lock(guard_);
+                const std::string & msg = attempt.rc == 0 ? attempt.output : attempt.error;
+                std::cout << "Task Attempt (" << dagRunID << '/' << taskID << "): Ran with RC " << attempt.rc << ": " << msg << std::endl;
+            }
+
+            void StdOutLogger::updateTaskState(DAGRunID dagRunID, TaskID taskID, RunState state) {
+                std::lock_guard<std::mutex> lock(guard_);
+                std::cout << "Task State Change (" << dagRunID << '/' << taskID << "): " << magic_enum::enum_name(state) << std::endl;
+            }
+
+            // Querying
+            std::vector<DAGRunSummary> StdOutLogger::getDAGs(uint32_t stateMask){ return {}; }
+            DAGRunRecord StdOutLogger::getDAGRun(DAGRunID dagRunId) { return {}; }
+        }
+    }
+}
--- a/daggy/src/executors/ForkingTaskExecutor.cpp
+++ b/daggy/src/executors/ForkingTaskExecutor.cpp
@@ -1,14 +1,11 @@
-#include <daggy/executors/ForkingTaskExecutor.hpp>
-
-#include <array>
-#include <utility>
+#include <daggy/executors/task/ForkingTaskExecutor.hpp>

 #include <fcntl.h>
 #include <unistd.h>
 #include <wait.h>
 #include <poll.h>

-using namespace daggy::executor;
+using namespace daggy::executors::task;

 std::string slurp(int fd) {
    std::string result;
--- a/tests/unit_executor_forkingexecutor.cpp
+++ b/tests/unit_executor_forkingexecutor.cpp
@@ -1,12 +1,12 @@
 #include <iostream>
 #include <filesystem>

-#include "daggy/executors/ForkingTaskExecutor.hpp"
+#include "daggy/executors/task/ForkingTaskExecutor.hpp"

 #include "catch.hpp"

 TEST_CASE("Basic Execution", "[forking_executor]") {
-  daggy::executor::ForkingTaskExecutor ex(10);
+  daggy::executors::task::ForkingTaskExecutor ex(10);

  SECTION("Simple Run") {
    std::vector<std::string> cmd{"/usr/bin/echo", "abc", "123"};
--- a/tests/unit_utilities.cpp
+++ b/tests/unit_utilities.cpp
@@ -6,9 +6,11 @@

 #include "daggy/Utilities.hpp"
 #include "daggy/Serialization.hpp"
+#include "daggy/executors/task/ForkingTaskExecutor.hpp"
+#include "daggy/loggers/dag_run/StdOutLogger.hpp"

 TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") {
-    SECTION("Basic Parse") {
+    SECTION("Basic expansion") {
        std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
        auto params = daggy::parametersFromJSON(testParams);
        std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"};
@@ -26,4 +28,16 @@ TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") {
        // TYPE isn't used, so it's just |DATE| * |SOURCE|
        REQUIRE(allCommands.size() == 2);
    }
+}
+
+TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
+    daggy::executors::task::ForkingTaskExecutor ex(10);
+    daggy::loggers::dag_run::StdOutLogger logger;
+
+    std::string taskJSON = R"([{"name": "A", "command": ["/bin/echo", "A"], "children": ["C"]}, {"name": "B", "command": ["/bin/echo", "B"], "children": ["C"]},{"name": "C", "command": ["/bin/echo", "C"]}])";
+    auto tasks = daggy::tasksFromJSON(taskJSON);
+    auto dag = daggy::buildDAGFromTasks(tasks);
+
+    auto runID = logger.startDAGRun("test_run", tasks);
+    daggy::runDAG(runID, tasks, ex, logger, dag);
 }