Adding in task attempts drilldown
This commit is contained in:
@@ -392,62 +392,52 @@ namespace daggy::daggyd {
|
||||
ss << '}';
|
||||
}
|
||||
else {
|
||||
std::unordered_map<RunState, size_t> stateCounts;
|
||||
for (const auto &[_, state] : run.taskRunStates) {
|
||||
stateCounts[state]++;
|
||||
}
|
||||
|
||||
ss << R"(<html>
|
||||
<header>
|
||||
<title>Details for RunID )"
|
||||
<< runID << R"(</title>
|
||||
<script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
|
||||
<script>mermaid.initialize({startOnLoad:true});</script>
|
||||
</header>
|
||||
<body>
|
||||
<center>
|
||||
<div class="mermaid">
|
||||
)"
|
||||
<< "graph LR;\n";
|
||||
<h2>Summary</h2>
|
||||
<table><tr><th>Run ID</th><th>Tag</th><th>State</th>
|
||||
<th>#Tasks</th>
|
||||
<th>Queued</th><th>Running</th><th>Retry</th>
|
||||
<th>Errored</th><th>Completed</th></tr>
|
||||
<tr>)"
|
||||
<< "<td>" << runID << "</td>"
|
||||
<< "<td>" << run.dagSpec.tag << "</td>"
|
||||
<< "<td>" << run.dagStateChanges.back().state << "</td>"
|
||||
<< "<td>" << run.dagSpec.tasks.size() << "</td>"
|
||||
<< "<td>" << stateCounts[RunState::QUEUED] << "</td>"
|
||||
<< "<td>" << stateCounts[RunState::RUNNING] << "</td>"
|
||||
<< "<td>" << stateCounts[RunState::RETRY] << "</td>"
|
||||
<< "<td>" << stateCounts[RunState::ERRORED] << "</td>"
|
||||
<< "<td>" << stateCounts[RunState::COMPLETED] << "</td>"
|
||||
<< "</tr></table>"
|
||||
<< "<h2>Task Details</h2>"
|
||||
<< "<table><tr><th>Task Name</th><th> State</th><th>Last "
|
||||
"Update</th><th> Logs</th></tr>";
|
||||
|
||||
std::unordered_map<std::string, std::unordered_set<std::string>>
|
||||
taskClassMap;
|
||||
for (const auto &[taskName, task] : run.dagSpec.tasks) {
|
||||
taskClassMap[task.definedName].emplace(taskName);
|
||||
ss << "<tr>"
|
||||
<< "<td>" << taskName << "</td>"
|
||||
<< "<td>" << run.taskRunStates.at(taskName) << "</td>"
|
||||
<< "<td>"
|
||||
<< timePointToString(run.taskStateChanges.at(taskName).back().time)
|
||||
<< "</td>"
|
||||
<< "<td><a href=\"/v1/dagrun/" << runID << "/task/" << taskName
|
||||
<< "\">Logs</a>"
|
||||
<< "</tr>";
|
||||
}
|
||||
for (const auto &[taskName, task] : run.dagSpec.tasks) {
|
||||
for (const auto &child : task.children) {
|
||||
for (const auto &ci : taskClassMap[child]) {
|
||||
ss << " " << taskName << "-->" << ci << '\n';
|
||||
}
|
||||
}
|
||||
ss << "click " << taskName << " href \"/v1/dagrun/" << runID << "/task/"
|
||||
<< taskName << "\"\n";
|
||||
ss << "style " << taskName << " fill: #";
|
||||
switch (run.taskStateChanges[taskName].back().state) {
|
||||
case RunState::QUEUED:
|
||||
ss << "55f";
|
||||
break;
|
||||
case RunState::RUNNING:
|
||||
ss << "5a5";
|
||||
break;
|
||||
case RunState::RETRY:
|
||||
ss << "55a";
|
||||
break;
|
||||
case RunState::ERRORED:
|
||||
ss << "55F";
|
||||
break;
|
||||
case RunState::COMPLETED:
|
||||
ss << "5f5";
|
||||
break;
|
||||
case RunState::KILLED:
|
||||
ss << "fff";
|
||||
break;
|
||||
case RunState::PAUSED:
|
||||
ss << "333";
|
||||
break;
|
||||
}
|
||||
ss << '\n';
|
||||
}
|
||||
ss << "</div><center></body></html>";
|
||||
ss << "</table></center></body></html>";
|
||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||
}
|
||||
|
||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||
}
|
||||
|
||||
void Server::handleGetDAGRunState(const Pistache::Rest::Request &request,
|
||||
@@ -557,14 +547,53 @@ namespace daggy::daggyd {
|
||||
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
bool isJSON = requestIsForJSON(request);
|
||||
|
||||
try {
|
||||
auto task = logger_.getTask(runID, taskName);
|
||||
response.send(Pistache::Http::Code::Ok, taskToJSON(task));
|
||||
std::stringstream ss;
|
||||
if (isJSON) {
|
||||
Task task;
|
||||
try {
|
||||
task = logger_.getTask(runID, taskName);
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Found, e.what());
|
||||
}
|
||||
ss << taskToJSON(task);
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Found, e.what());
|
||||
else {
|
||||
std::optional<loggers::dag_run::TaskRecord> tr;
|
||||
try {
|
||||
tr.emplace(logger_.getTaskRecord(runID, taskName));
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Found, e.what());
|
||||
}
|
||||
ss << "<html><title>Task Details for " << runID << " / " << taskName
|
||||
<< "</title><body>"
|
||||
<< "<table>"
|
||||
<< "<tr><th>Name</th><td>" << taskName << "</td></tr>"
|
||||
<< "<tr><th>State</th><td>" << tr->state << "</td></tr>"
|
||||
<< "<tr><th>Definition</th><td>" << taskToJSON(tr->task)
|
||||
<< "</td></tr>"
|
||||
<< "<tr><th colspan=2>Attempts</th></tr>";
|
||||
|
||||
std::sort(tr->attempts.begin(), tr->attempts.end(),
|
||||
[](const auto &a, const auto &b) {
|
||||
return a.startTime < b.startTime;
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < tr->attempts.size(); ++i) {
|
||||
const auto &attempt = tr->attempts[i];
|
||||
ss << "<tr><td valign=top>" << timePointToString(attempt.startTime)
|
||||
<< "</td><td><pre>rc: " << attempt.rc
|
||||
<< "\n\nstdout:\n--------------\n"
|
||||
<< attempt.outputLog << "\n\nstderr:\n--------------\n"
|
||||
<< attempt.errorLog << "</pre></td></tr>";
|
||||
}
|
||||
|
||||
ss << "</table></body></html>";
|
||||
}
|
||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||
}
|
||||
|
||||
void Server::handleGetTaskState(const Pistache::Rest::Request &request,
|
||||
|
||||
@@ -44,7 +44,11 @@ namespace daggy::loggers::dag_run {
|
||||
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
|
||||
|
||||
virtual Task getTask(DAGRunID dagRunID, const std::string &taskName) = 0;
|
||||
|
||||
virtual TaskRecord getTaskRecord(DAGRunID dagRunID,
|
||||
const std::string &taskName) = 0;
|
||||
|
||||
virtual RunState getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName) = 0;
|
||||
const std::string &taskName) = 0;
|
||||
};
|
||||
} // namespace daggy::loggers::dag_run
|
||||
|
||||
@@ -15,10 +15,19 @@ namespace daggy::loggers::dag_run {
|
||||
RunState state;
|
||||
};
|
||||
|
||||
struct TaskRecord
|
||||
{
|
||||
Task task;
|
||||
RunState state;
|
||||
std::vector<StateUpdateRecord> stateChanges;
|
||||
std::vector<AttemptRecord> attempts;
|
||||
};
|
||||
|
||||
// Pretty heavy weight, but
|
||||
struct DAGRunRecord
|
||||
{
|
||||
DAGSpec dagSpec;
|
||||
std::unordered_map<std::string, TaskRecord> taskRecords;
|
||||
std::unordered_map<std::string, RunState> taskRunStates;
|
||||
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
|
||||
std::unordered_map<std::string, std::vector<StateUpdateRecord>>
|
||||
|
||||
@@ -44,6 +44,8 @@ namespace daggy::loggers::dag_run {
|
||||
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
||||
|
||||
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
||||
TaskRecord getTaskRecord(DAGRunID dagRunID,
|
||||
const std::string &taskName) override;
|
||||
RunState getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName) override;
|
||||
|
||||
|
||||
@@ -59,6 +59,8 @@ namespace daggy::loggers::dag_run {
|
||||
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
||||
|
||||
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
||||
TaskRecord getTaskRecord(DAGRunID dagRunID,
|
||||
const std::string &taskName) override;
|
||||
RunState getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName) override;
|
||||
|
||||
|
||||
@@ -170,6 +170,18 @@ namespace daggy::loggers::dag_run {
|
||||
return dagRuns_.at(dagRunID).dagSpec.tasks.at(taskName);
|
||||
}
|
||||
|
||||
TaskRecord OStreamLogger::getTaskRecord(DAGRunID dagRunID,
|
||||
const std::string &taskName)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
const auto &run = dagRuns_.at(dagRunID);
|
||||
|
||||
return TaskRecord{.task = run.dagSpec.tasks.at(taskName),
|
||||
.state = run.taskRunStates.at(taskName),
|
||||
.stateChanges = run.taskStateChanges.at(taskName),
|
||||
.attempts = run.taskAttempts.at(taskName)};
|
||||
}
|
||||
|
||||
RunState OStreamLogger::getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName)
|
||||
{
|
||||
|
||||
@@ -252,6 +252,48 @@ namespace daggy::loggers::dag_run {
|
||||
return taskFromJSON(taskName, resp.as<std::string>());
|
||||
}
|
||||
|
||||
TaskRecord RedisLogger::getTaskRecord(DAGRunID dagRunID,
|
||||
const std::string &taskName)
|
||||
{
|
||||
// Task State
|
||||
auto taskState = RunState::_from_string(
|
||||
ctx_.query("HGET %s %s", getTaskStatesKey_(dagRunID).c_str(),
|
||||
taskName.c_str())
|
||||
.as<std::string>()
|
||||
.c_str());
|
||||
|
||||
// task
|
||||
auto task = taskFromJSON(
|
||||
taskName, ctx_.query("HGET %s %s", getTasksKey_(dagRunID).c_str(),
|
||||
taskName.c_str())
|
||||
.as<std::string>());
|
||||
|
||||
// Attempts
|
||||
auto attemptJSONS =
|
||||
ctx_.query("LRANGE %s 0 -1",
|
||||
getTaskAttemptKey_(dagRunID, taskName).c_str())
|
||||
.asList<std::string>();
|
||||
std::vector<AttemptRecord> attempts;
|
||||
std::transform(attemptJSONS.begin(), attemptJSONS.end(),
|
||||
std::back_inserter(attempts),
|
||||
[](const auto &s) { return attemptRecordFromJSON(s); });
|
||||
|
||||
// Populate stateUpdates
|
||||
auto taskStateUpdates =
|
||||
ctx_.query("LRANGE %s 0 -1",
|
||||
getTaskStateUpdateKey_(dagRunID, taskName).c_str())
|
||||
.asList<std::string>();
|
||||
std::vector<StateUpdateRecord> stateUpdates;
|
||||
std::transform(taskStateUpdates.begin(), taskStateUpdates.end(),
|
||||
std::back_inserter(stateUpdates),
|
||||
[](const auto &s) { return stateUpdateRecordFromJSON(s); });
|
||||
|
||||
return TaskRecord{.task = task,
|
||||
.state = taskState,
|
||||
.stateChanges = stateUpdates,
|
||||
.attempts = attempts};
|
||||
}
|
||||
|
||||
RunState RedisLogger::getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user