Adding in task attempts drilldown
This commit is contained in:
@@ -392,62 +392,52 @@ namespace daggy::daggyd {
|
|||||||
ss << '}';
|
ss << '}';
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
std::unordered_map<RunState, size_t> stateCounts;
|
||||||
|
for (const auto &[_, state] : run.taskRunStates) {
|
||||||
|
stateCounts[state]++;
|
||||||
|
}
|
||||||
|
|
||||||
ss << R"(<html>
|
ss << R"(<html>
|
||||||
<header>
|
<header>
|
||||||
<title>Details for RunID )"
|
<title>Details for RunID )"
|
||||||
<< runID << R"(</title>
|
<< runID << R"(</title>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
|
|
||||||
<script>mermaid.initialize({startOnLoad:true});</script>
|
|
||||||
</header>
|
</header>
|
||||||
<body>
|
<body>
|
||||||
<center>
|
<center>
|
||||||
<div class="mermaid">
|
<h2>Summary</h2>
|
||||||
)"
|
<table><tr><th>Run ID</th><th>Tag</th><th>State</th>
|
||||||
<< "graph LR;\n";
|
<th>#Tasks</th>
|
||||||
|
<th>Queued</th><th>Running</th><th>Retry</th>
|
||||||
|
<th>Errored</th><th>Completed</th></tr>
|
||||||
|
<tr>)"
|
||||||
|
<< "<td>" << runID << "</td>"
|
||||||
|
<< "<td>" << run.dagSpec.tag << "</td>"
|
||||||
|
<< "<td>" << run.dagStateChanges.back().state << "</td>"
|
||||||
|
<< "<td>" << run.dagSpec.tasks.size() << "</td>"
|
||||||
|
<< "<td>" << stateCounts[RunState::QUEUED] << "</td>"
|
||||||
|
<< "<td>" << stateCounts[RunState::RUNNING] << "</td>"
|
||||||
|
<< "<td>" << stateCounts[RunState::RETRY] << "</td>"
|
||||||
|
<< "<td>" << stateCounts[RunState::ERRORED] << "</td>"
|
||||||
|
<< "<td>" << stateCounts[RunState::COMPLETED] << "</td>"
|
||||||
|
<< "</tr></table>"
|
||||||
|
<< "<h2>Task Details</h2>"
|
||||||
|
<< "<table><tr><th>Task Name</th><th> State</th><th>Last "
|
||||||
|
"Update</th><th> Logs</th></tr>";
|
||||||
|
|
||||||
std::unordered_map<std::string, std::unordered_set<std::string>>
|
|
||||||
taskClassMap;
|
|
||||||
for (const auto &[taskName, task] : run.dagSpec.tasks) {
|
for (const auto &[taskName, task] : run.dagSpec.tasks) {
|
||||||
taskClassMap[task.definedName].emplace(taskName);
|
ss << "<tr>"
|
||||||
|
<< "<td>" << taskName << "</td>"
|
||||||
|
<< "<td>" << run.taskRunStates.at(taskName) << "</td>"
|
||||||
|
<< "<td>"
|
||||||
|
<< timePointToString(run.taskStateChanges.at(taskName).back().time)
|
||||||
|
<< "</td>"
|
||||||
|
<< "<td><a href=\"/v1/dagrun/" << runID << "/task/" << taskName
|
||||||
|
<< "\">Logs</a>"
|
||||||
|
<< "</tr>";
|
||||||
}
|
}
|
||||||
for (const auto &[taskName, task] : run.dagSpec.tasks) {
|
ss << "</table></center></body></html>";
|
||||||
for (const auto &child : task.children) {
|
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||||
for (const auto &ci : taskClassMap[child]) {
|
|
||||||
ss << " " << taskName << "-->" << ci << '\n';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ss << "click " << taskName << " href \"/v1/dagrun/" << runID << "/task/"
|
|
||||||
<< taskName << "\"\n";
|
|
||||||
ss << "style " << taskName << " fill: #";
|
|
||||||
switch (run.taskStateChanges[taskName].back().state) {
|
|
||||||
case RunState::QUEUED:
|
|
||||||
ss << "55f";
|
|
||||||
break;
|
|
||||||
case RunState::RUNNING:
|
|
||||||
ss << "5a5";
|
|
||||||
break;
|
|
||||||
case RunState::RETRY:
|
|
||||||
ss << "55a";
|
|
||||||
break;
|
|
||||||
case RunState::ERRORED:
|
|
||||||
ss << "55F";
|
|
||||||
break;
|
|
||||||
case RunState::COMPLETED:
|
|
||||||
ss << "5f5";
|
|
||||||
break;
|
|
||||||
case RunState::KILLED:
|
|
||||||
ss << "fff";
|
|
||||||
break;
|
|
||||||
case RunState::PAUSED:
|
|
||||||
ss << "333";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
ss << '\n';
|
|
||||||
}
|
|
||||||
ss << "</div><center></body></html>";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Server::handleGetDAGRunState(const Pistache::Rest::Request &request,
|
void Server::handleGetDAGRunState(const Pistache::Rest::Request &request,
|
||||||
@@ -557,14 +547,53 @@ namespace daggy::daggyd {
|
|||||||
|
|
||||||
auto runID = request.param(":runID").as<DAGRunID>();
|
auto runID = request.param(":runID").as<DAGRunID>();
|
||||||
auto taskName = request.param(":taskName").as<std::string>();
|
auto taskName = request.param(":taskName").as<std::string>();
|
||||||
|
bool isJSON = requestIsForJSON(request);
|
||||||
|
|
||||||
try {
|
std::stringstream ss;
|
||||||
auto task = logger_.getTask(runID, taskName);
|
if (isJSON) {
|
||||||
response.send(Pistache::Http::Code::Ok, taskToJSON(task));
|
Task task;
|
||||||
|
try {
|
||||||
|
task = logger_.getTask(runID, taskName);
|
||||||
|
}
|
||||||
|
catch (std::exception &e) {
|
||||||
|
REQ_RESPONSE(Not_Found, e.what());
|
||||||
|
}
|
||||||
|
ss << taskToJSON(task);
|
||||||
}
|
}
|
||||||
catch (std::exception &e) {
|
else {
|
||||||
REQ_RESPONSE(Not_Found, e.what());
|
std::optional<loggers::dag_run::TaskRecord> tr;
|
||||||
|
try {
|
||||||
|
tr.emplace(logger_.getTaskRecord(runID, taskName));
|
||||||
|
}
|
||||||
|
catch (std::exception &e) {
|
||||||
|
REQ_RESPONSE(Not_Found, e.what());
|
||||||
|
}
|
||||||
|
ss << "<html><title>Task Details for " << runID << " / " << taskName
|
||||||
|
<< "</title><body>"
|
||||||
|
<< "<table>"
|
||||||
|
<< "<tr><th>Name</th><td>" << taskName << "</td></tr>"
|
||||||
|
<< "<tr><th>State</th><td>" << tr->state << "</td></tr>"
|
||||||
|
<< "<tr><th>Definition</th><td>" << taskToJSON(tr->task)
|
||||||
|
<< "</td></tr>"
|
||||||
|
<< "<tr><th colspan=2>Attempts</th></tr>";
|
||||||
|
|
||||||
|
std::sort(tr->attempts.begin(), tr->attempts.end(),
|
||||||
|
[](const auto &a, const auto &b) {
|
||||||
|
return a.startTime < b.startTime;
|
||||||
|
});
|
||||||
|
|
||||||
|
for (size_t i = 0; i < tr->attempts.size(); ++i) {
|
||||||
|
const auto &attempt = tr->attempts[i];
|
||||||
|
ss << "<tr><td valign=top>" << timePointToString(attempt.startTime)
|
||||||
|
<< "</td><td><pre>rc: " << attempt.rc
|
||||||
|
<< "\n\nstdout:\n--------------\n"
|
||||||
|
<< attempt.outputLog << "\n\nstderr:\n--------------\n"
|
||||||
|
<< attempt.errorLog << "</pre></td></tr>";
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << "</table></body></html>";
|
||||||
}
|
}
|
||||||
|
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Server::handleGetTaskState(const Pistache::Rest::Request &request,
|
void Server::handleGetTaskState(const Pistache::Rest::Request &request,
|
||||||
|
|||||||
@@ -44,7 +44,11 @@ namespace daggy::loggers::dag_run {
|
|||||||
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
|
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
|
||||||
|
|
||||||
virtual Task getTask(DAGRunID dagRunID, const std::string &taskName) = 0;
|
virtual Task getTask(DAGRunID dagRunID, const std::string &taskName) = 0;
|
||||||
|
|
||||||
|
virtual TaskRecord getTaskRecord(DAGRunID dagRunID,
|
||||||
|
const std::string &taskName) = 0;
|
||||||
|
|
||||||
virtual RunState getTaskState(DAGRunID dagRunID,
|
virtual RunState getTaskState(DAGRunID dagRunID,
|
||||||
const std::string &taskName) = 0;
|
const std::string &taskName) = 0;
|
||||||
};
|
};
|
||||||
} // namespace daggy::loggers::dag_run
|
} // namespace daggy::loggers::dag_run
|
||||||
|
|||||||
@@ -15,10 +15,19 @@ namespace daggy::loggers::dag_run {
|
|||||||
RunState state;
|
RunState state;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TaskRecord
|
||||||
|
{
|
||||||
|
Task task;
|
||||||
|
RunState state;
|
||||||
|
std::vector<StateUpdateRecord> stateChanges;
|
||||||
|
std::vector<AttemptRecord> attempts;
|
||||||
|
};
|
||||||
|
|
||||||
// Pretty heavy weight, but
|
// Pretty heavy weight, but
|
||||||
struct DAGRunRecord
|
struct DAGRunRecord
|
||||||
{
|
{
|
||||||
DAGSpec dagSpec;
|
DAGSpec dagSpec;
|
||||||
|
std::unordered_map<std::string, TaskRecord> taskRecords;
|
||||||
std::unordered_map<std::string, RunState> taskRunStates;
|
std::unordered_map<std::string, RunState> taskRunStates;
|
||||||
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
|
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
|
||||||
std::unordered_map<std::string, std::vector<StateUpdateRecord>>
|
std::unordered_map<std::string, std::vector<StateUpdateRecord>>
|
||||||
|
|||||||
@@ -44,6 +44,8 @@ namespace daggy::loggers::dag_run {
|
|||||||
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
||||||
|
|
||||||
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
||||||
|
TaskRecord getTaskRecord(DAGRunID dagRunID,
|
||||||
|
const std::string &taskName) override;
|
||||||
RunState getTaskState(DAGRunID dagRunID,
|
RunState getTaskState(DAGRunID dagRunID,
|
||||||
const std::string &taskName) override;
|
const std::string &taskName) override;
|
||||||
|
|
||||||
|
|||||||
@@ -59,6 +59,8 @@ namespace daggy::loggers::dag_run {
|
|||||||
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
||||||
|
|
||||||
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
||||||
|
TaskRecord getTaskRecord(DAGRunID dagRunID,
|
||||||
|
const std::string &taskName) override;
|
||||||
RunState getTaskState(DAGRunID dagRunID,
|
RunState getTaskState(DAGRunID dagRunID,
|
||||||
const std::string &taskName) override;
|
const std::string &taskName) override;
|
||||||
|
|
||||||
|
|||||||
@@ -170,6 +170,18 @@ namespace daggy::loggers::dag_run {
|
|||||||
return dagRuns_.at(dagRunID).dagSpec.tasks.at(taskName);
|
return dagRuns_.at(dagRunID).dagSpec.tasks.at(taskName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TaskRecord OStreamLogger::getTaskRecord(DAGRunID dagRunID,
|
||||||
|
const std::string &taskName)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(guard_);
|
||||||
|
const auto &run = dagRuns_.at(dagRunID);
|
||||||
|
|
||||||
|
return TaskRecord{.task = run.dagSpec.tasks.at(taskName),
|
||||||
|
.state = run.taskRunStates.at(taskName),
|
||||||
|
.stateChanges = run.taskStateChanges.at(taskName),
|
||||||
|
.attempts = run.taskAttempts.at(taskName)};
|
||||||
|
}
|
||||||
|
|
||||||
RunState OStreamLogger::getTaskState(DAGRunID dagRunID,
|
RunState OStreamLogger::getTaskState(DAGRunID dagRunID,
|
||||||
const std::string &taskName)
|
const std::string &taskName)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -252,6 +252,48 @@ namespace daggy::loggers::dag_run {
|
|||||||
return taskFromJSON(taskName, resp.as<std::string>());
|
return taskFromJSON(taskName, resp.as<std::string>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TaskRecord RedisLogger::getTaskRecord(DAGRunID dagRunID,
|
||||||
|
const std::string &taskName)
|
||||||
|
{
|
||||||
|
// Task State
|
||||||
|
auto taskState = RunState::_from_string(
|
||||||
|
ctx_.query("HGET %s %s", getTaskStatesKey_(dagRunID).c_str(),
|
||||||
|
taskName.c_str())
|
||||||
|
.as<std::string>()
|
||||||
|
.c_str());
|
||||||
|
|
||||||
|
// task
|
||||||
|
auto task = taskFromJSON(
|
||||||
|
taskName, ctx_.query("HGET %s %s", getTasksKey_(dagRunID).c_str(),
|
||||||
|
taskName.c_str())
|
||||||
|
.as<std::string>());
|
||||||
|
|
||||||
|
// Attempts
|
||||||
|
auto attemptJSONS =
|
||||||
|
ctx_.query("LRANGE %s 0 -1",
|
||||||
|
getTaskAttemptKey_(dagRunID, taskName).c_str())
|
||||||
|
.asList<std::string>();
|
||||||
|
std::vector<AttemptRecord> attempts;
|
||||||
|
std::transform(attemptJSONS.begin(), attemptJSONS.end(),
|
||||||
|
std::back_inserter(attempts),
|
||||||
|
[](const auto &s) { return attemptRecordFromJSON(s); });
|
||||||
|
|
||||||
|
// Populate stateUpdates
|
||||||
|
auto taskStateUpdates =
|
||||||
|
ctx_.query("LRANGE %s 0 -1",
|
||||||
|
getTaskStateUpdateKey_(dagRunID, taskName).c_str())
|
||||||
|
.asList<std::string>();
|
||||||
|
std::vector<StateUpdateRecord> stateUpdates;
|
||||||
|
std::transform(taskStateUpdates.begin(), taskStateUpdates.end(),
|
||||||
|
std::back_inserter(stateUpdates),
|
||||||
|
[](const auto &s) { return stateUpdateRecordFromJSON(s); });
|
||||||
|
|
||||||
|
return TaskRecord{.task = task,
|
||||||
|
.state = taskState,
|
||||||
|
.stateChanges = stateUpdates,
|
||||||
|
.attempts = attempts};
|
||||||
|
}
|
||||||
|
|
||||||
RunState RedisLogger::getTaskState(DAGRunID dagRunID,
|
RunState RedisLogger::getTaskState(DAGRunID dagRunID,
|
||||||
const std::string &taskName)
|
const std::string &taskName)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user