Squashed commit of the following:
commit 73994327de890590eede353c8131f3f7c1e8aaa3 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Aug 25 13:38:29 2021 -0300 - Fixing up checks for individual dag runs commit f20e3a3dec8c063111cf60f2bec2b8f84c8a4100 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Aug 25 10:49:43 2021 -0300 - Finishing serialization of DAGRun - Checkpointing work. commit b490abadf93e3085e4204003de7eaa8183b4e1d5 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Aug 25 10:34:08 2021 -0300 - Consolidating struct definitions into Defines.hpp - Renaming DAGRunRecord member runStates to taskRunStates commit 050346ec1fd10d1091f261905c6175ffe0bcf001 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Aug 25 09:27:05 2021 -0300 - Adding additional tests for server endpoints
This commit is contained in:
@@ -37,6 +37,12 @@ and [postgres](https://postgresql.org).
|
|||||||
Building
|
Building
|
||||||
==
|
==
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
|
||||||
|
- git
|
||||||
|
- cmake >= 3.14
|
||||||
|
- gcc >= 9
|
||||||
|
|
||||||
```
|
```
|
||||||
git clone https://gitlab.com/iroddis/daggy
|
git clone https://gitlab.com/iroddis/daggy
|
||||||
cd daggy
|
cd daggy
|
||||||
|
|||||||
10
TODO.md
10
TODO.md
@@ -6,8 +6,10 @@ Tasks
|
|||||||
- [ ] Add in authorization scheme (maybe PAM auth endpoint with JWT?)
|
- [ ] Add in authorization scheme (maybe PAM auth endpoint with JWT?)
|
||||||
- [ ] Flesh out server and interface
|
- [ ] Flesh out server and interface
|
||||||
- Core Functionality
|
- Core Functionality
|
||||||
- [ ] Handle return on errored DAG / Task
|
- Allow `{{,!DATES}}` style interpolation, where values from `{{DATES}}` are preserved as a single, comma
|
||||||
- [ ] Clearing a DAG Task
|
separated list
|
||||||
|
- Allow for tasks to define next tasks
|
||||||
|
- Add execution gates
|
||||||
- Executors
|
- Executors
|
||||||
- [ ] Slurm Executor
|
- [ ] Slurm Executor
|
||||||
- Loggers
|
- Loggers
|
||||||
@@ -22,4 +24,6 @@ Tasks
|
|||||||
- [X] Add ability to define child -> parent relationships
|
- [X] Add ability to define child -> parent relationships
|
||||||
- [X] Rip apart scheduler and re-implement runDag as a utility function, taking an execution pool, task list,
|
- [X] Rip apart scheduler and re-implement runDag as a utility function, taking an execution pool, task list,
|
||||||
and logger
|
and logger
|
||||||
- [X] Resume a failed DAG
|
- [X] Resume a failed DAG
|
||||||
|
- [X] Handle return on errored DAG / Task
|
||||||
|
- [X] Clearing a DAG Task
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "Defines.hpp"
|
|
||||||
|
|
||||||
namespace daggy {
|
|
||||||
struct AttemptRecord {
|
|
||||||
TimePoint startTime;
|
|
||||||
TimePoint stopTime;
|
|
||||||
int rc; // RC from the task
|
|
||||||
std::string executorLog; // Logs from the dag_executor
|
|
||||||
std::string outputLog; // stdout from command
|
|
||||||
std::string errorLog; // stderr from command
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <variant>
|
|
||||||
|
|
||||||
#include "DAG.hpp"
|
|
||||||
#include "Task.hpp"
|
|
||||||
#include "AttemptRecord.hpp"
|
|
||||||
|
|
||||||
namespace daggy {
|
|
||||||
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
|
|
||||||
using TaskRun = std::vector<AttemptRecord>;
|
|
||||||
|
|
||||||
struct DAGRun {
|
|
||||||
std::vector<Task> tasks;
|
|
||||||
std::unordered_map<std::string, ParameterValue> parameters;
|
|
||||||
DAG dag;
|
|
||||||
std::vector<TaskRun> taskRuns;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -3,6 +3,7 @@
|
|||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include <variant>
|
#include <variant>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -29,4 +30,29 @@ namespace daggy {
|
|||||||
KILLED = 1 << 3,
|
KILLED = 1 << 3,
|
||||||
COMPLETED = 1 << 4
|
COMPLETED = 1 << 4
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Task {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> command;
|
||||||
|
uint32_t maxRetries;
|
||||||
|
uint32_t retryIntervalSeconds; // Time to wait between retries
|
||||||
|
std::unordered_set<std::string> children;
|
||||||
|
|
||||||
|
bool operator==(const Task &other) const {
|
||||||
|
return (name == other.name)
|
||||||
|
and (maxRetries == other.maxRetries)
|
||||||
|
and (retryIntervalSeconds == other.retryIntervalSeconds)
|
||||||
|
and (command == other.command)
|
||||||
|
and (children == other.children);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AttemptRecord {
|
||||||
|
TimePoint startTime;
|
||||||
|
TimePoint stopTime;
|
||||||
|
int rc; // RC from the task
|
||||||
|
std::string executorLog; // Logs from the dag_executor
|
||||||
|
std::string outputLog; // stdout from command
|
||||||
|
std::string errorLog; // stderr from command
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,8 +8,6 @@
|
|||||||
#include <rapidjson/document.h>
|
#include <rapidjson/document.h>
|
||||||
|
|
||||||
#include "Defines.hpp"
|
#include "Defines.hpp"
|
||||||
#include "Task.hpp"
|
|
||||||
#include "AttemptRecord.hpp"
|
|
||||||
|
|
||||||
namespace rj = rapidjson;
|
namespace rj = rapidjson;
|
||||||
|
|
||||||
|
|||||||
@@ -1,24 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
namespace daggy {
|
|
||||||
struct Task {
|
|
||||||
std::string name;
|
|
||||||
std::vector<std::string> command;
|
|
||||||
uint32_t maxRetries;
|
|
||||||
uint32_t retryIntervalSeconds; // Time to wait between retries
|
|
||||||
std::unordered_set<std::string> children;
|
|
||||||
|
|
||||||
bool operator==(const Task &other) const {
|
|
||||||
return (name == other.name)
|
|
||||||
and (maxRetries == other.maxRetries)
|
|
||||||
and (retryIntervalSeconds == other.retryIntervalSeconds)
|
|
||||||
and (command == other.command)
|
|
||||||
and (children == other.children);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -9,7 +9,6 @@
|
|||||||
|
|
||||||
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
|
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
|
||||||
#include "daggy/executors/task/TaskExecutor.hpp"
|
#include "daggy/executors/task/TaskExecutor.hpp"
|
||||||
#include "Task.hpp"
|
|
||||||
#include "Defines.hpp"
|
#include "Defines.hpp"
|
||||||
#include "DAG.hpp"
|
#include "DAG.hpp"
|
||||||
|
|
||||||
|
|||||||
@@ -16,4 +16,4 @@ namespace daggy {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,8 +6,7 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "daggy/Task.hpp"
|
#include "daggy/Defines.hpp"
|
||||||
#include "daggy/AttemptRecord.hpp"
|
|
||||||
#include "daggy/ThreadPool.hpp"
|
#include "daggy/ThreadPool.hpp"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -2,8 +2,6 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "../../Task.hpp"
|
|
||||||
#include "../../AttemptRecord.hpp"
|
|
||||||
#include "../../Defines.hpp"
|
#include "../../Defines.hpp"
|
||||||
#include "Defines.hpp"
|
#include "Defines.hpp"
|
||||||
|
|
||||||
|
|||||||
@@ -1,37 +1,39 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
namespace daggy {
|
#include <cstdint>
|
||||||
namespace loggers {
|
#include <string>
|
||||||
namespace dag_run {
|
#include <vector>
|
||||||
struct TaskUpdateRecord {
|
#include <unordered_set>
|
||||||
TimePoint time;
|
|
||||||
TaskID taskID;
|
|
||||||
RunState newState;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct DAGUpdateRecord {
|
|
||||||
TimePoint time;
|
|
||||||
RunState newState;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pretty heavy weight, but
|
namespace daggy::loggers::dag_run {
|
||||||
struct DAGRunRecord {
|
struct TaskUpdateRecord {
|
||||||
std::string name;
|
TimePoint time;
|
||||||
std::vector<Task> tasks;
|
TaskID taskID;
|
||||||
std::vector<RunState> runStates;
|
RunState newState;
|
||||||
std::vector<std::vector<AttemptRecord>> taskAttempts;
|
};
|
||||||
std::vector<TaskUpdateRecord> taskStateChanges;
|
|
||||||
std::vector<DAGUpdateRecord> dagStateChanges;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct DAGRunSummary {
|
struct DAGUpdateRecord {
|
||||||
DAGRunID runID;
|
TimePoint time;
|
||||||
std::string name;
|
RunState newState;
|
||||||
RunState runState;
|
};
|
||||||
TimePoint startTime;
|
|
||||||
TimePoint lastUpdate;
|
// Pretty heavy weight, but
|
||||||
std::unordered_map<RunState, size_t> taskStateCounts;
|
struct DAGRunRecord {
|
||||||
};
|
std::string name;
|
||||||
}
|
std::vector<Task> tasks;
|
||||||
}
|
std::vector<RunState> taskRunStates;
|
||||||
}
|
std::vector<std::vector<AttemptRecord>> taskAttempts;
|
||||||
|
std::vector<TaskUpdateRecord> taskStateChanges;
|
||||||
|
std::vector<DAGUpdateRecord> dagStateChanges;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DAGRunSummary {
|
||||||
|
DAGRunID runID;
|
||||||
|
std::string name;
|
||||||
|
RunState runState;
|
||||||
|
TimePoint startTime;
|
||||||
|
TimePoint lastUpdate;
|
||||||
|
std::unordered_map<RunState, size_t> taskStateCounts;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,65 +6,62 @@
|
|||||||
|
|
||||||
#include <rapidjson/document.h>
|
#include <rapidjson/document.h>
|
||||||
#include "DAGRunLogger.hpp"
|
#include "DAGRunLogger.hpp"
|
||||||
|
#include "Defines.hpp"
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
namespace rj = rapidjson;
|
namespace rj = rapidjson;
|
||||||
|
|
||||||
namespace daggy {
|
namespace daggy::loggers::dag_run {
|
||||||
namespace loggers {
|
/*
|
||||||
namespace dag_run {
|
* This logger should only be used for debug purposes. It's not really optimized for querying, and will
|
||||||
/*
|
* use a ton of inodes to track state.
|
||||||
* This logger should only be used for debug purposes. It's not really optimized for querying, and will
|
*
|
||||||
* use a ton of inodes to track state.
|
* On the plus side, it's trivial to look at without using the API.
|
||||||
*
|
*
|
||||||
* On the plus side, it's trivial to look at without using the API.
|
* Filesystem logger creates the following structure:
|
||||||
*
|
* {root}/
|
||||||
* Filesystem logger creates the following structure:
|
* runs/
|
||||||
* {root}/
|
* {runID}/
|
||||||
* runs/
|
* meta.json --- Contains the DAG name, task definitions
|
||||||
* {runID}/
|
* states.csv --- DAG state changes
|
||||||
* meta.json --- Contains the DAG name, task definitions
|
* {taskName}/
|
||||||
* states.csv --- DAG state changes
|
* states.csv --- TASK state changes
|
||||||
* {taskName}/
|
* {attempt}/
|
||||||
* states.csv --- TASK state changes
|
* metadata.json --- timestamps and rc
|
||||||
* {attempt}/
|
* output.log
|
||||||
* metadata.json --- timestamps and rc
|
* error.log
|
||||||
* output.log
|
* executor.log
|
||||||
* error.log
|
*/
|
||||||
* executor.log
|
class FileSystemLogger : public DAGRunLogger {
|
||||||
*/
|
public:
|
||||||
class FileSystemLogger : public DAGRunLogger {
|
FileSystemLogger(fs::path root);
|
||||||
public:
|
|
||||||
FileSystemLogger(fs::path root);
|
|
||||||
|
|
||||||
// Execution
|
// Execution
|
||||||
DAGRunID startDAGRun(std::string name, const std::vector<Task> &tasks) override;
|
DAGRunID startDAGRun(std::string name, const std::vector<Task> &tasks) override;
|
||||||
|
|
||||||
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
|
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
logTaskAttempt(DAGRunID, const std::string &taskName, const AttemptRecord &attempt) override;
|
logTaskAttempt(DAGRunID, const std::string &taskName, const AttemptRecord &attempt) override;
|
||||||
|
|
||||||
void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) override;
|
void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) override;
|
||||||
|
|
||||||
// Querying
|
// Querying
|
||||||
std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
|
std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
|
||||||
|
|
||||||
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
fs::path root_;
|
fs::path root_;
|
||||||
std::atomic<DAGRunID> nextRunID_;
|
std::atomic<DAGRunID> nextRunID_;
|
||||||
std::mutex lock_;
|
std::mutex lock_;
|
||||||
|
|
||||||
// std::unordered_map<fs::path, std::mutex> runLocks;
|
// std::unordered_map<fs::path, std::mutex> runLocks;
|
||||||
|
|
||||||
inline const fs::path getCurrentPath() const;
|
inline const fs::path getCurrentPath() const;
|
||||||
|
|
||||||
inline const fs::path getRunsRoot() const;
|
inline const fs::path getRunsRoot() const;
|
||||||
|
|
||||||
inline const fs::path getRunRoot(DAGRunID runID) const;
|
inline const fs::path getRunRoot(DAGRunID runID) const;
|
||||||
};
|
};
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
#include "DAGRunLogger.hpp"
|
#include "DAGRunLogger.hpp"
|
||||||
|
#include "Defines.hpp"
|
||||||
|
|
||||||
namespace daggy {
|
namespace daggy {
|
||||||
namespace loggers {
|
namespace loggers {
|
||||||
@@ -38,4 +39,4 @@ namespace daggy {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -228,4 +228,4 @@ namespace daggy {
|
|||||||
ss >> std::get_time(&dt, "%Y-%m-%d %H:%M:%S %Z");
|
ss >> std::get_time(&dt, "%Y-%m-%d %H:%M:%S %Z");
|
||||||
return Clock::from_time_t(mktime(&dt));
|
return Clock::from_time_t(mktime(&dt));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
#include <magic_enum.hpp>
|
||||||
|
|
||||||
#include <daggy/Server.hpp>
|
#include <daggy/Server.hpp>
|
||||||
|
|
||||||
#include <daggy/Serialization.hpp>
|
#include <daggy/Serialization.hpp>
|
||||||
@@ -64,6 +66,12 @@ namespace daggy {
|
|||||||
.bind(&Server::handleRunDAG, this)
|
.bind(&Server::handleRunDAG, this)
|
||||||
.produces(MIME(Application, Json), MIME(Application, Xml))
|
.produces(MIME(Application, Json), MIME(Application, Xml))
|
||||||
.response(Http::Code::Ok, "Run a DAG");
|
.response(Http::Code::Ok, "Run a DAG");
|
||||||
|
// List detailed DAG run
|
||||||
|
dagPath
|
||||||
|
.route(desc_.get("/:runID"))
|
||||||
|
.bind(&Server::handleGetDAGRun, this)
|
||||||
|
.produces(MIME(Application, Json), MIME(Application, Xml))
|
||||||
|
.response(Http::Code::Ok, "Details of a specific DAG run");
|
||||||
|
|
||||||
// List all DAG runs
|
// List all DAG runs
|
||||||
dagPath
|
dagPath
|
||||||
@@ -72,12 +80,7 @@ namespace daggy {
|
|||||||
.produces(MIME(Application, Json), MIME(Application, Xml))
|
.produces(MIME(Application, Json), MIME(Application, Xml))
|
||||||
.response(Http::Code::Ok, "The list of all known DAG Runs");
|
.response(Http::Code::Ok, "The list of all known DAG Runs");
|
||||||
|
|
||||||
// List detailed DAG run
|
|
||||||
dagPath
|
|
||||||
.route(desc_.get("/{id}"))
|
|
||||||
.bind(&Server::handleGetDAGRun, this)
|
|
||||||
.produces(MIME(Application, Json), MIME(Application, Xml))
|
|
||||||
.response(Http::Code::Ok, "Details of a specific DAG run");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -127,7 +130,7 @@ namespace daggy {
|
|||||||
auto runID = logger_.startDAGRun(runName, tasks);
|
auto runID = logger_.startDAGRun(runName, tasks);
|
||||||
auto dag = buildDAGFromTasks(tasks);
|
auto dag = buildDAGFromTasks(tasks);
|
||||||
|
|
||||||
auto fut = runnerPool_.addTask(
|
runnerPool_.addTask(
|
||||||
[this, runID, tasks, dag]() { runDAG(runID, tasks, this->executor_, this->logger_, dag); });
|
[this, runID, tasks, dag]() { runDAG(runID, tasks, this->executor_, this->logger_, dag); });
|
||||||
|
|
||||||
response.send(Pistache::Http::Code::Ok, R"({"runID": )" + std::to_string(runID) + "}");
|
response.send(Pistache::Http::Code::Ok, R"({"runID": )" + std::to_string(runID) + "}");
|
||||||
@@ -135,10 +138,99 @@ namespace daggy {
|
|||||||
|
|
||||||
void Server::handleGetDAGRuns(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
void Server::handleGetDAGRuns(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
||||||
if (!handleAuth(request, response)) return;
|
if (!handleAuth(request, response)) return;
|
||||||
|
auto dagRuns = logger_.getDAGs(0);
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << '[';
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
for (const auto &run : dagRuns) {
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
ss << ", ";
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << " {"
|
||||||
|
<< R"("runID": )" << run.runID << ','
|
||||||
|
<< R"("name": )" << std::quoted(run.name) << ","
|
||||||
|
<< R"("startTime": )" << std::quoted(timePointToString(run.startTime)) << ','
|
||||||
|
<< R"("lastUpdate": )" << std::quoted(timePointToString(run.lastUpdate)) << ','
|
||||||
|
<< R"("taskCounts": {)";
|
||||||
|
bool firstState = true;
|
||||||
|
for (const auto &[state, count] : run.taskStateCounts) {
|
||||||
|
if (firstState) {
|
||||||
|
firstState = false;
|
||||||
|
} else {
|
||||||
|
ss << ", ";
|
||||||
|
}
|
||||||
|
ss << std::quoted(magic_enum::enum_name(state)) << ':' << count;
|
||||||
|
}
|
||||||
|
ss << '}' // end of taskCounts
|
||||||
|
<< '}'; // end of item
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << ']';
|
||||||
|
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Server::handleGetDAGRun(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
void Server::handleGetDAGRun(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
||||||
if (!handleAuth(request, response)) return;
|
if (!handleAuth(request, response)) return;
|
||||||
|
if (!request.hasParam(":runID")) { REQ_ERROR(Not_Found, "No runID provided in URL"); }
|
||||||
|
DAGRunID runID = request.param(":runID").as<size_t>();
|
||||||
|
auto run = logger_.getDAGRun(runID);
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "{"
|
||||||
|
<< R"("runID": )" << runID << ','
|
||||||
|
<< R"("name": )" << std::quoted(run.name) << ','
|
||||||
|
<< R"("tasks": )" << tasksToJSON(run.tasks) << ',';
|
||||||
|
|
||||||
|
// task run states
|
||||||
|
ss << R"("taskStates": [ )";
|
||||||
|
first = true;
|
||||||
|
for (const auto &state : run.taskRunStates) {
|
||||||
|
if (first) { first = false; } else { ss << ','; }
|
||||||
|
ss << std::quoted(magic_enum::enum_name(state));
|
||||||
|
}
|
||||||
|
ss << "],";
|
||||||
|
|
||||||
|
// Attempt records
|
||||||
|
first = true;
|
||||||
|
ss << R"("taskAttempts": [ )";
|
||||||
|
for (const auto &attempts : run.taskAttempts) {
|
||||||
|
if (first) { first = false; } else { ss << ','; }
|
||||||
|
ss << '[';
|
||||||
|
bool firstAttempt = true;
|
||||||
|
for (const auto &attempt : attempts) {
|
||||||
|
if (firstAttempt) { firstAttempt = false; } else { ss << ','; }
|
||||||
|
ss << '{'
|
||||||
|
<< R"("startTime":)" << std::quoted(timePointToString(attempt.startTime)) << ','
|
||||||
|
<< R"("stopTime":)" << std::quoted(timePointToString(attempt.stopTime)) << ','
|
||||||
|
<< R"("rc":)" << attempt.rc << ','
|
||||||
|
<< R"("outputLog":)" << std::quoted(attempt.outputLog) << ','
|
||||||
|
<< R"("errorLog":)" << std::quoted(attempt.errorLog) << ','
|
||||||
|
<< R"("executorLog":)" << std::quoted(attempt.executorLog)
|
||||||
|
<< '}';
|
||||||
|
}
|
||||||
|
ss << ']';
|
||||||
|
}
|
||||||
|
ss << "],";
|
||||||
|
|
||||||
|
// DAG state changes
|
||||||
|
first = true;
|
||||||
|
ss << R"("dagStateChanges": [ )";
|
||||||
|
for (const auto &change : run.dagStateChanges) {
|
||||||
|
if (first) { first = false; } else { ss << ','; }
|
||||||
|
ss << '{'
|
||||||
|
<< R"("newState": )" << std::quoted(magic_enum::enum_name(change.newState)) << ','
|
||||||
|
<< R"("time": )" << std::quoted(timePointToString(change.time))
|
||||||
|
<< '}';
|
||||||
|
}
|
||||||
|
ss << "]";
|
||||||
|
ss << '}';
|
||||||
|
|
||||||
|
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Server::handleReady(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
void Server::handleReady(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
||||||
|
|||||||
@@ -174,6 +174,11 @@ namespace daggy {
|
|||||||
}
|
}
|
||||||
std::this_thread::sleep_for(250ms);
|
std::this_thread::sleep_for(250ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dag.allVisited()) {
|
||||||
|
logger.updateDAGRunState(runID, RunState::COMPLETED);
|
||||||
|
}
|
||||||
|
|
||||||
return dag;
|
return dag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ namespace daggy {
|
|||||||
for (const auto &task : record.tasks) {
|
for (const auto &task : record.tasks) {
|
||||||
auto taskStateFile = runRoot / task.name / "states.csv";
|
auto taskStateFile = runRoot / task.name / "states.csv";
|
||||||
if (!fs::exists(taskStateFile)) {
|
if (!fs::exists(taskStateFile)) {
|
||||||
record.runStates.push_back(RunState::QUEUED);
|
record.taskRunStates.push_back(RunState::QUEUED);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -169,9 +169,9 @@ namespace daggy {
|
|||||||
std::stringstream ss{line};
|
std::stringstream ss{line};
|
||||||
while (std::getline(ss, token, ',')) { continue; }
|
while (std::getline(ss, token, ',')) { continue; }
|
||||||
RunState taskState = magic_enum::enum_cast<RunState>(token).value();
|
RunState taskState = magic_enum::enum_cast<RunState>(token).value();
|
||||||
record.runStates.emplace_back(taskState);
|
record.taskRunStates.emplace_back(taskState);
|
||||||
ifh.close();
|
ifh.close();
|
||||||
}
|
}
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,8 +17,7 @@ namespace daggy {
|
|||||||
dagRuns_.push_back({
|
dagRuns_.push_back({
|
||||||
.name = name,
|
.name = name,
|
||||||
.tasks = tasks,
|
.tasks = tasks,
|
||||||
//.runStates = std::vector<RunState>(tasks.size(), RunState::QUEUED),
|
.taskRunStates{tasks.size(), RunState::QUEUED},
|
||||||
.runStates{tasks.size(), RunState::QUEUED},
|
|
||||||
.taskAttempts = std::vector<std::vector<AttemptRecord>>(tasks.size())
|
.taskAttempts = std::vector<std::vector<AttemptRecord>>(tasks.size())
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -56,21 +55,53 @@ namespace daggy {
|
|||||||
|
|
||||||
void OStreamLogger::updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) {
|
void OStreamLogger::updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) {
|
||||||
std::lock_guard<std::mutex> lock(guard_);
|
std::lock_guard<std::mutex> lock(guard_);
|
||||||
os_ << "Task State Change (" << dagRunID << '/' << taskName << "): " << magic_enum::enum_name(state)
|
auto &dagRun = dagRuns_[dagRunID];
|
||||||
<< std::endl;
|
const auto &tasks = dagRun.tasks;
|
||||||
|
|
||||||
const auto &tasks = dagRuns_[dagRunID].tasks;
|
|
||||||
auto it = std::find_if(tasks.begin(), tasks.end(),
|
auto it = std::find_if(tasks.begin(), tasks.end(),
|
||||||
[&taskName](const Task &a) { return a.name == taskName; });
|
[&taskName](const Task &a) { return a.name == taskName; });
|
||||||
if (it == tasks.end()) throw std::runtime_error("No such task: " + taskName);
|
if (it == tasks.end()) throw std::runtime_error("No such task: " + taskName);
|
||||||
size_t taskID = it - tasks.begin();
|
size_t taskID = it - tasks.begin();
|
||||||
dagRuns_[dagRunID].taskStateChanges.push_back({Clock::now(), taskID, state});
|
dagRun.taskStateChanges.push_back({Clock::now(), taskID, state});
|
||||||
|
dagRun.taskRunStates[taskID] = state;
|
||||||
|
|
||||||
|
os_ << "Task State Change (" << dagRunID << '/' << taskName << " [task_id: " << taskID << "]): "
|
||||||
|
<< magic_enum::enum_name(state)
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Querying
|
// Querying
|
||||||
std::vector<DAGRunSummary> OStreamLogger::getDAGs(uint32_t stateMask) { return {}; }
|
std::vector<DAGRunSummary> OStreamLogger::getDAGs(uint32_t stateMask) {
|
||||||
|
std::vector<DAGRunSummary> summaries;
|
||||||
|
std::lock_guard<std::mutex> lock(guard_);
|
||||||
|
size_t i = 0;
|
||||||
|
for (const auto &run : dagRuns_) {
|
||||||
|
DAGRunSummary summary{
|
||||||
|
.runID = i,
|
||||||
|
.name = run.name,
|
||||||
|
.runState = run.dagStateChanges.back().newState,
|
||||||
|
.startTime = run.dagStateChanges.front().time,
|
||||||
|
.lastUpdate = std::max<TimePoint>(run.taskStateChanges.back().time,
|
||||||
|
run.dagStateChanges.back().time)
|
||||||
|
};
|
||||||
|
|
||||||
DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID) { return dagRuns_[dagRunID]; }
|
std::vector<RunState> states(run.tasks.size());
|
||||||
|
for (const auto &taskUpdate : run.taskStateChanges) {
|
||||||
|
states[taskUpdate.taskID] = taskUpdate.newState;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &taskState : states) {
|
||||||
|
summary.taskStateCounts[taskState]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
summaries.emplace_back(summary);
|
||||||
|
}
|
||||||
|
return summaries;
|
||||||
|
}
|
||||||
|
|
||||||
|
DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID) {
|
||||||
|
std::lock_guard<std::mutex> lock(guard_);
|
||||||
|
return dagRuns_[dagRunID];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,11 +4,14 @@
|
|||||||
|
|
||||||
#include <catch2/catch.hpp>
|
#include <catch2/catch.hpp>
|
||||||
#include <pistache/client.h>
|
#include <pistache/client.h>
|
||||||
|
#include <rapidjson/document.h>
|
||||||
|
|
||||||
#include "daggy/Server.hpp"
|
#include "daggy/Server.hpp"
|
||||||
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
||||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
||||||
|
|
||||||
|
namespace rj = rapidjson;
|
||||||
|
|
||||||
Pistache::Http::Response
|
Pistache::Http::Response
|
||||||
REQUEST(std::string url, std::string payload = "") {
|
REQUEST(std::string url, std::string payload = "") {
|
||||||
Pistache::Http::Experimental::Client client;
|
Pistache::Http::Experimental::Client client;
|
||||||
@@ -81,12 +84,80 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
|||||||
"parents": [ "touch" ]
|
"parents": [ "touch" ]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
})";
|
})";
|
||||||
|
|
||||||
|
|
||||||
|
// Submit, and get the runID
|
||||||
|
daggy::DAGRunID runID = 0;
|
||||||
|
{
|
||||||
|
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun);
|
||||||
|
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||||
|
|
||||||
|
rj::Document doc;
|
||||||
|
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
|
||||||
|
REQUIRE(parseResult);
|
||||||
|
REQUIRE(doc.IsObject());
|
||||||
|
REQUIRE(doc.HasMember("runID"));
|
||||||
|
|
||||||
|
runID = doc["runID"].GetUint64();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure our runID shows up in the list of running DAGs
|
||||||
|
{
|
||||||
|
auto response = REQUEST(baseURL + "/v1/dagrun/");
|
||||||
|
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||||
|
|
||||||
|
rj::Document doc;
|
||||||
|
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
|
||||||
|
REQUIRE(parseResult);
|
||||||
|
REQUIRE(doc.IsArray());
|
||||||
|
REQUIRE(doc.Size() >= 1);
|
||||||
|
|
||||||
|
// Ensure that our DAG is in the list and matches our given DAGRunID
|
||||||
|
bool found = false;
|
||||||
|
const auto &runs = doc.GetArray();
|
||||||
|
for (size_t i = 0; i < runs.Size(); ++i) {
|
||||||
|
const auto &run = runs[i];
|
||||||
|
REQUIRE(run.IsObject());
|
||||||
|
REQUIRE(run.HasMember("name"));
|
||||||
|
REQUIRE(run.HasMember("runID"));
|
||||||
|
|
||||||
|
std::string runName = run["name"].GetString();
|
||||||
|
if (runName == "unit_server") {
|
||||||
|
REQUIRE(run["runID"].GetUint64() == runID);
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
REQUIRE(found);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait until our DAG is complete
|
||||||
|
bool complete = false;
|
||||||
|
while (!complete) {
|
||||||
|
complete = true;
|
||||||
|
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
||||||
|
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||||
|
rj::Document doc;
|
||||||
|
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
|
||||||
|
REQUIRE(parseResult);
|
||||||
|
REQUIRE(doc.IsObject());
|
||||||
|
|
||||||
|
REQUIRE(doc.HasMember("taskStates"));
|
||||||
|
const auto &taskStates = doc["taskStates"].GetArray();
|
||||||
|
REQUIRE(taskStates.Size() == 3);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < taskStates.Size(); ++i) {
|
||||||
|
std::string state = taskStates[i].GetString();
|
||||||
|
if (state != "COMPLETED") {
|
||||||
|
complete = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||||
|
}
|
||||||
|
|
||||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun);
|
|
||||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
|
||||||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||||
|
|
||||||
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
|
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
|
||||||
REQUIRE(fs::exists(pth));
|
REQUIRE(fs::exists(pth));
|
||||||
fs::remove(pth);
|
fs::remove(pth);
|
||||||
@@ -94,4 +165,4 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
|||||||
}
|
}
|
||||||
|
|
||||||
server.shutdown();
|
server.shutdown();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user