Files
daggy/libdaggy/src/executors/task/SlurmTaskExecutor.cpp
2022-01-12 12:50:46 -04:00

365 lines
12 KiB
C++

#include <iomanip>
#include <iterator>
#include <mutex>
#include <stdexcept>
#ifdef DAGGY_ENABLE_SLURM
#include <slurm/slurm.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <csignal>
#include <cstdlib>
#include <daggy/Utilities.hpp>
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
#include <filesystem>
#include <fstream>
#include <random>
namespace fs = std::filesystem;
namespace daggy::executors::task {
std::string getUniqueTag(size_t nChars = 6)
{
std::string result(nChars, '\0');
static std::random_device dev;
static std::mt19937 rng(dev());
std::uniform_int_distribution<int> dist(0, 61);
const char *v =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (size_t i = 0; i < nChars; i++) {
result[i] = v[dist(rng)];
}
return result;
}
void readAndClean(const fs::path &fn, std::string &dest)
{
if (!fs::exists(fn))
return;
std::ifstream ifh;
ifh.open(fn);
std::string contents(std::istreambuf_iterator<char>{ifh}, {});
ifh.close();
fs::remove_all(fn);
dest.swap(contents);
}
SlurmTaskExecutor::SlurmTaskExecutor()
: running_(true)
, monitorWorker_(&SlurmTaskExecutor::monitor, this)
{
std::string priority =
"SLURM_PRIO_PROCESS=" + std::to_string(getpriority(PRIO_PROCESS, 0));
std::string submitDir = "SLURM_SUBMIT_DIR=" + fs::current_path().string();
const size_t MAX_HOSTNAME_LENGTH = 50;
std::string submitHost(MAX_HOSTNAME_LENGTH, '\0');
gethostname(submitHost.data(), MAX_HOSTNAME_LENGTH);
submitHost = "SLURM_SUBMIT_HOST=" + submitHost;
submitHost.resize(submitHost.find('\0'));
uint32_t mask = umask(0);
umask(mask); // Restore the old mask
std::stringstream ss;
ss << "SLURM_UMASK=0" << uint32_t{((mask >> 6) & 07)}
<< uint32_t{((mask >> 3) & 07)} << uint32_t{(mask & 07)};
// Set some environment variables
putenv(const_cast<char *>(priority.c_str()));
putenv(const_cast<char *>(submitDir.c_str()));
putenv(const_cast<char *>(submitHost.c_str()));
putenv(const_cast<char *>(ss.str().c_str()));
}
SlurmTaskExecutor::~SlurmTaskExecutor()
{
running_ = false;
monitorWorker_.join();
// Resolve the remaining futures
std::lock_guard<std::mutex> lock(promiseGuard_);
for (auto &[jobID, job] : runningJobs_) {
job.fut->set(AttemptRecord{.rc = -1, .executorLog = "executor killed"});
}
runningJobs_.clear();
}
std::string SlurmTaskExecutor::description() const
{
return "SlurmTaskExecutor";
}
// Validates the job to ensure that all required values are set and are of
// the right type,
bool SlurmTaskExecutor::validateTaskParameters(const ConfigValues &job)
{
const std::unordered_set<std::string> requiredFields{
"minCPUs", "minMemoryMB", "minTmpDiskMB", "priority",
"timeLimitSeconds", "userID", "workDir", "tmpDir"};
for (const auto &requiredField : requiredFields) {
if (job.count(requiredField) == 0) {
throw std::runtime_error("Missing field " + requiredField);
}
}
// Require command or commandString
if (job.count("command") + job.count("commandString") == 0)
throw std::runtime_error(
"Either command or commandString must be specified");
if (job.count("environment")) {
if (!std::holds_alternative<Command>(job.at("environment")))
throw std::runtime_error(R"(environment must be an array of strings)");
}
return true;
}
std::vector<ConfigValues> SlurmTaskExecutor::expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues)
{
std::vector<ConfigValues> newValues;
auto command =
(job.count("command") == 0 ? Command{}
: std::get<Command>(job.at("command")));
auto environment = (job.count("environment") == 0
? Command{}
: std::get<Command>(job.at("environment")));
Command both(command);
std::copy(environment.begin(), environment.end(), std::back_inserter(both));
for (const auto &parts : interpolateValues(both, expansionValues)) {
ConfigValues newCommand{job};
newCommand["command"] =
Command(parts.begin(), parts.begin() + command.size());
newCommand["environment"] =
Command(parts.begin() + command.size(), parts.end());
newValues.emplace_back(newCommand);
}
return newValues;
}
TaskFuture SlurmTaskExecutor::execute(DAGRunID runID,
const std::string &taskName,
const Task &task)
{
std::stringstream executorLog;
const auto &job = task.job;
const auto uniqueTaskName = taskName + "_" + getUniqueTag(6);
fs::path tmpDir = std::get<std::string>(job.at("tmpDir"));
std::string stdoutFile = (tmpDir / (uniqueTaskName + ".stdout")).string();
std::string stderrFile = (tmpDir / (uniqueTaskName + ".stderr")).string();
std::string workDir = std::get<std::string>(job.at("workDir"));
// Convert command to argc / argv
std::vector<char *> argv{nullptr};
// Populate the command
Command command;
if (task.job.count("commandString")) {
std::stringstream ss;
ss << std::get<std::string>(task.job.at("commandString"));
std::string tok;
while (ss >> std::quoted(tok)) {
command.push_back(tok);
}
}
else {
const auto cmd = std::get<Command>(task.job.at("command"));
std::copy(cmd.begin(), cmd.end(), std::back_inserter(command));
}
std::transform(
command.begin(), command.end(), std::back_inserter(argv),
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
argv.push_back(nullptr);
std::vector<std::string> env{""};
std::vector<char *> envp;
auto it = task.job.find("environment");
if (it != task.job.end()) {
const auto environment = std::get<Command>(task.job.at("environment"));
std::copy(environment.begin(), environment.end(),
std::back_inserter(env));
}
std::transform(
env.begin(), env.end(), std::back_inserter(envp),
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
char script[] = "#!/bin/bash\n$@\n";
char stdinFile[] = "/dev/null";
// taken from slurm
int error_code;
job_desc_msg_t jd;
submit_response_msg_t *resp_msg;
slurm_init_job_desc_msg(&jd);
jd.contiguous = 1;
jd.name = const_cast<char *>(taskName.c_str());
jd.min_cpus = std::stoi(std::get<std::string>(job.at("minCPUs")));
jd.pn_min_memory = std::stoi(std::get<std::string>(job.at("minMemoryMB")));
jd.pn_min_tmp_disk =
std::stoi(std::get<std::string>(job.at("minTmpDiskMB")));
jd.priority = std::stoi(std::get<std::string>(job.at("priority")));
jd.shared = 0;
jd.time_limit =
std::stoi(std::get<std::string>(job.at("timeLimitSeconds")));
jd.min_nodes = 1;
jd.user_id = std::stoi(std::get<std::string>(job.at("userID")));
jd.argv = argv.data();
jd.argc = argv.size();
// TODO figure out the script to run
jd.script = script;
jd.std_in = stdinFile;
jd.std_err = const_cast<char *>(stderrFile.c_str());
jd.std_out = const_cast<char *>(stdoutFile.c_str());
jd.work_dir = const_cast<char *>(workDir.c_str());
// jd.env_size = 1;
// jd.environment = env;
jd.env_size = envp.size();
jd.environment = envp.data();
error_code = slurm_submit_batch_job(&jd, &resp_msg);
if (error_code) {
std::stringstream ss;
ss << "Unable to submit slurm job: " << slurm_strerror(error_code);
throw std::runtime_error(ss.str());
}
uint32_t jobID = resp_msg->job_id;
executorLog << "Job " << resp_msg->job_submit_user_msg << '\n';
slurm_free_submit_response_response_msg(resp_msg);
std::lock_guard<std::mutex> lock(promiseGuard_);
Job newJob{.fut = std::make_shared<Future<AttemptRecord>>(),
.stdoutFile = stdoutFile,
.stderrFile = stderrFile,
.runID = runID,
.taskName = taskName};
auto fut = newJob.fut;
runningJobs_.emplace(jobID, std::move(newJob));
return fut;
}
bool SlurmTaskExecutor::stop(DAGRunID runID, const std::string &taskName)
{
// Hopefully this isn't a common thing, so just scrap the current jobs and
// kill them
size_t jobID = 0;
{
std::lock_guard<std::mutex> lock(promiseGuard_);
for (const auto &[k, v] : runningJobs_) {
if (v.runID == runID and v.taskName == taskName) {
jobID = k;
break;
}
}
if (jobID == 0)
return true;
}
// Send the kill message to slurm
slurm_kill_job(jobID, SIGKILL, KILL_HURRY);
return true;
}
void SlurmTaskExecutor::monitor()
{
std::unordered_set<size_t> resolvedJobs;
while (running_) {
{
std::lock_guard<std::mutex> lock(promiseGuard_);
for (auto &[jobID, job] : runningJobs_) {
job_info_msg_t *jobStatus;
int error_code =
slurm_load_job(&jobStatus, jobID, SHOW_ALL | SHOW_DETAIL);
if (error_code != SLURM_SUCCESS)
continue;
uint32_t idx = jobStatus->record_count;
if (idx == 0)
continue;
idx--;
const slurm_job_info_t &jobInfo = jobStatus->job_array[idx];
AttemptRecord record;
switch (jobInfo.job_state) {
case JOB_PENDING:
case JOB_SUSPENDED:
case JOB_RUNNING:
continue;
// Job has finished
case JOB_COMPLETE: /* completed execution successfully */
record.rc = jobInfo.exit_code;
break;
case JOB_FAILED: /* completed execution unsuccessfully */
record.rc = jobInfo.exit_code;
record.executorLog = "Script errored.\n";
break;
case JOB_CANCELLED: /* cancelled by user */
record.rc = 9; // matches SIGKILL
record.executorLog = "Job cancelled by user.\n";
break;
case JOB_TIMEOUT: /* terminated on reaching time limit */
record.rc = jobInfo.exit_code;
record.executorLog = "Job exceeded time limit.\n";
break;
case JOB_NODE_FAIL: /* terminated on node failure */
record.rc = jobInfo.exit_code;
record.executorLog = "Node failed during execution\n";
break;
case JOB_PREEMPTED: /* terminated due to preemption */
record.rc = jobInfo.exit_code;
record.executorLog = "Job terminated due to pre-emption.\n";
break;
case JOB_BOOT_FAIL: /* terminated due to node boot failure */
record.rc = jobInfo.exit_code;
record.executorLog =
"Job failed to run due to failure of compute node to "
"boot.\n";
break;
case JOB_DEADLINE: /* terminated on deadline */
record.rc = jobInfo.exit_code;
record.executorLog = "Job terminated due to deadline.\n";
break;
case JOB_OOM: /* experienced out of memory error */
record.rc = jobInfo.exit_code;
record.executorLog = "Job terminated due to out-of-memory.\n";
break;
}
slurm_free_job_info_msg(jobStatus);
readAndClean(job.stdoutFile, record.outputLog);
readAndClean(job.stderrFile, record.errorLog);
job.fut->set(std::move(record));
resolvedJobs.insert(jobID);
}
for (const auto &jobID : resolvedJobs) {
runningJobs_.extract(jobID);
}
}
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
} // namespace daggy::executors::task
#endif