Adding in task attempts drilldown

This commit is contained in:
Ian Roddis
2022-01-06 15:20:06 -04:00
parent 1786b53f7b
commit 856e5bd2f4
7 changed files with 151 additions and 51 deletions

View File

@@ -392,62 +392,52 @@ namespace daggy::daggyd {
ss << '}';
}
else {
std::unordered_map<RunState, size_t> stateCounts;
for (const auto &[_, state] : run.taskRunStates) {
stateCounts[state]++;
}
ss << R"(<html>
<header>
<title>Details for RunID )"
<< runID << R"(</title>
<script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
<script>mermaid.initialize({startOnLoad:true});</script>
</header>
<body>
<center>
<div class="mermaid">
)"
<< "graph LR;\n";
<h2>Summary</h2>
<table><tr><th>Run ID</th><th>Tag</th><th>State</th>
<th>#Tasks</th>
<th>Queued</th><th>Running</th><th>Retry</th>
<th>Errored</th><th>Completed</th></tr>
<tr>)"
<< "<td>" << runID << "</td>"
<< "<td>" << run.dagSpec.tag << "</td>"
<< "<td>" << run.dagStateChanges.back().state << "</td>"
<< "<td>" << run.dagSpec.tasks.size() << "</td>"
<< "<td>" << stateCounts[RunState::QUEUED] << "</td>"
<< "<td>" << stateCounts[RunState::RUNNING] << "</td>"
<< "<td>" << stateCounts[RunState::RETRY] << "</td>"
<< "<td>" << stateCounts[RunState::ERRORED] << "</td>"
<< "<td>" << stateCounts[RunState::COMPLETED] << "</td>"
<< "</tr></table>"
<< "<h2>Task Details</h2>"
<< "<table><tr><th>Task Name</th><th> State</th><th>Last "
"Update</th><th> Logs</th></tr>";
std::unordered_map<std::string, std::unordered_set<std::string>>
taskClassMap;
for (const auto &[taskName, task] : run.dagSpec.tasks) {
taskClassMap[task.definedName].emplace(taskName);
ss << "<tr>"
<< "<td>" << taskName << "</td>"
<< "<td>" << run.taskRunStates.at(taskName) << "</td>"
<< "<td>"
<< timePointToString(run.taskStateChanges.at(taskName).back().time)
<< "</td>"
<< "<td><a href=\"/v1/dagrun/" << runID << "/task/" << taskName
<< "\">Logs</a>"
<< "</tr>";
}
for (const auto &[taskName, task] : run.dagSpec.tasks) {
for (const auto &child : task.children) {
for (const auto &ci : taskClassMap[child]) {
ss << " " << taskName << "-->" << ci << '\n';
}
}
ss << "click " << taskName << " href \"/v1/dagrun/" << runID << "/task/"
<< taskName << "\"\n";
ss << "style " << taskName << " fill: #";
switch (run.taskStateChanges[taskName].back().state) {
case RunState::QUEUED:
ss << "55f";
break;
case RunState::RUNNING:
ss << "5a5";
break;
case RunState::RETRY:
ss << "55a";
break;
case RunState::ERRORED:
ss << "55F";
break;
case RunState::COMPLETED:
ss << "5f5";
break;
case RunState::KILLED:
ss << "fff";
break;
case RunState::PAUSED:
ss << "333";
break;
}
ss << '\n';
}
ss << "</div><center></body></html>";
ss << "</table></center></body></html>";
response.send(Pistache::Http::Code::Ok, ss.str());
}
response.send(Pistache::Http::Code::Ok, ss.str());
}
void Server::handleGetDAGRunState(const Pistache::Rest::Request &request,
@@ -557,14 +547,53 @@ namespace daggy::daggyd {
auto runID = request.param(":runID").as<DAGRunID>();
auto taskName = request.param(":taskName").as<std::string>();
bool isJSON = requestIsForJSON(request);
try {
auto task = logger_.getTask(runID, taskName);
response.send(Pistache::Http::Code::Ok, taskToJSON(task));
std::stringstream ss;
if (isJSON) {
Task task;
try {
task = logger_.getTask(runID, taskName);
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Found, e.what());
}
ss << taskToJSON(task);
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Found, e.what());
else {
std::optional<loggers::dag_run::TaskRecord> tr;
try {
tr.emplace(logger_.getTaskRecord(runID, taskName));
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Found, e.what());
}
ss << "<html><title>Task Details for " << runID << " / " << taskName
<< "</title><body>"
<< "<table>"
<< "<tr><th>Name</th><td>" << taskName << "</td></tr>"
<< "<tr><th>State</th><td>" << tr->state << "</td></tr>"
<< "<tr><th>Definition</th><td>" << taskToJSON(tr->task)
<< "</td></tr>"
<< "<tr><th colspan=2>Attempts</th></tr>";
std::sort(tr->attempts.begin(), tr->attempts.end(),
[](const auto &a, const auto &b) {
return a.startTime < b.startTime;
});
for (size_t i = 0; i < tr->attempts.size(); ++i) {
const auto &attempt = tr->attempts[i];
ss << "<tr><td valign=top>" << timePointToString(attempt.startTime)
<< "</td><td><pre>rc: " << attempt.rc
<< "\n\nstdout:\n--------------\n"
<< attempt.outputLog << "\n\nstderr:\n--------------\n"
<< attempt.errorLog << "</pre></td></tr>";
}
ss << "</table></body></html>";
}
response.send(Pistache::Http::Code::Ok, ss.str());
}
void Server::handleGetTaskState(const Pistache::Rest::Request &request,

View File

@@ -44,7 +44,11 @@ namespace daggy::loggers::dag_run {
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
virtual Task getTask(DAGRunID dagRunID, const std::string &taskName) = 0;
virtual TaskRecord getTaskRecord(DAGRunID dagRunID,
const std::string &taskName) = 0;
virtual RunState getTaskState(DAGRunID dagRunID,
const std::string &taskName) = 0;
const std::string &taskName) = 0;
};
} // namespace daggy::loggers::dag_run

View File

@@ -15,10 +15,19 @@ namespace daggy::loggers::dag_run {
RunState state;
};
struct TaskRecord
{
Task task;
RunState state;
std::vector<StateUpdateRecord> stateChanges;
std::vector<AttemptRecord> attempts;
};
// Pretty heavy weight, but
struct DAGRunRecord
{
DAGSpec dagSpec;
std::unordered_map<std::string, TaskRecord> taskRecords;
std::unordered_map<std::string, RunState> taskRunStates;
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
std::unordered_map<std::string, std::vector<StateUpdateRecord>>

View File

@@ -44,6 +44,8 @@ namespace daggy::loggers::dag_run {
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
TaskRecord getTaskRecord(DAGRunID dagRunID,
const std::string &taskName) override;
RunState getTaskState(DAGRunID dagRunID,
const std::string &taskName) override;

View File

@@ -59,6 +59,8 @@ namespace daggy::loggers::dag_run {
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
TaskRecord getTaskRecord(DAGRunID dagRunID,
const std::string &taskName) override;
RunState getTaskState(DAGRunID dagRunID,
const std::string &taskName) override;

View File

@@ -170,6 +170,18 @@ namespace daggy::loggers::dag_run {
return dagRuns_.at(dagRunID).dagSpec.tasks.at(taskName);
}
TaskRecord OStreamLogger::getTaskRecord(DAGRunID dagRunID,
const std::string &taskName)
{
std::lock_guard<std::mutex> lock(guard_);
const auto &run = dagRuns_.at(dagRunID);
return TaskRecord{.task = run.dagSpec.tasks.at(taskName),
.state = run.taskRunStates.at(taskName),
.stateChanges = run.taskStateChanges.at(taskName),
.attempts = run.taskAttempts.at(taskName)};
}
RunState OStreamLogger::getTaskState(DAGRunID dagRunID,
const std::string &taskName)
{

View File

@@ -252,6 +252,48 @@ namespace daggy::loggers::dag_run {
return taskFromJSON(taskName, resp.as<std::string>());
}
TaskRecord RedisLogger::getTaskRecord(DAGRunID dagRunID,
const std::string &taskName)
{
// Task State
auto taskState = RunState::_from_string(
ctx_.query("HGET %s %s", getTaskStatesKey_(dagRunID).c_str(),
taskName.c_str())
.as<std::string>()
.c_str());
// task
auto task = taskFromJSON(
taskName, ctx_.query("HGET %s %s", getTasksKey_(dagRunID).c_str(),
taskName.c_str())
.as<std::string>());
// Attempts
auto attemptJSONS =
ctx_.query("LRANGE %s 0 -1",
getTaskAttemptKey_(dagRunID, taskName).c_str())
.asList<std::string>();
std::vector<AttemptRecord> attempts;
std::transform(attemptJSONS.begin(), attemptJSONS.end(),
std::back_inserter(attempts),
[](const auto &s) { return attemptRecordFromJSON(s); });
// Populate stateUpdates
auto taskStateUpdates =
ctx_.query("LRANGE %s 0 -1",
getTaskStateUpdateKey_(dagRunID, taskName).c_str())
.asList<std::string>();
std::vector<StateUpdateRecord> stateUpdates;
std::transform(taskStateUpdates.begin(), taskStateUpdates.end(),
std::back_inserter(stateUpdates),
[](const auto &s) { return stateUpdateRecordFromJSON(s); });
return TaskRecord{.task = task,
.state = taskState,
.stateChanges = stateUpdates,
.attempts = attempts};
}
RunState RedisLogger::getTaskState(DAGRunID dagRunID,
const std::string &taskName)
{