#include #include #include #include #ifdef DAGGY_ENABLE_SLURM #include #include #include #include #include #include #include #include #include #include #include #include namespace fs = std::filesystem; namespace daggy::executors::task { std::string getUniqueTag(size_t nChars = 6) { std::string result(nChars, '\0'); static std::random_device dev; static std::mt19937 rng(dev()); std::uniform_int_distribution dist(0, 61); const char *v = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; for (size_t i = 0; i < nChars; i++) { result[i] = v[dist(rng)]; } return result; } void readAndClean(const fs::path &fn, std::string &dest) { if (!fs::exists(fn)) return; std::ifstream ifh; ifh.open(fn); std::string contents(std::istreambuf_iterator{ifh}, {}); ifh.close(); fs::remove_all(fn); dest.swap(contents); } SlurmTaskExecutor::SlurmTaskExecutor() : running_(true) , monitorWorker_(&SlurmTaskExecutor::monitor, this) { std::string priority = "SLURM_PRIO_PROCESS=" + std::to_string(getpriority(PRIO_PROCESS, 0)); std::string submitDir = "SLURM_SUBMIT_DIR=" + fs::current_path().string(); const size_t MAX_HOSTNAME_LENGTH = 50; std::string submitHost(MAX_HOSTNAME_LENGTH, '\0'); gethostname(submitHost.data(), MAX_HOSTNAME_LENGTH); submitHost = "SLURM_SUBMIT_HOST=" + submitHost; submitHost.resize(submitHost.find('\0')); uint32_t mask = umask(0); umask(mask); // Restore the old mask std::stringstream ss; ss << "SLURM_UMASK=0" << uint32_t{((mask >> 6) & 07)} << uint32_t{((mask >> 3) & 07)} << uint32_t{(mask & 07)}; // Set some environment variables putenv(const_cast(priority.c_str())); putenv(const_cast(submitDir.c_str())); putenv(const_cast(submitHost.c_str())); putenv(const_cast(ss.str().c_str())); } SlurmTaskExecutor::~SlurmTaskExecutor() { running_ = false; monitorWorker_.join(); // Resolve the remaining futures std::lock_guard lock(promiseGuard_); for (auto &[jobID, job] : runningJobs_) { job.fut->set(AttemptRecord{.rc = -1, .executorLog = "executor killed"}); } runningJobs_.clear(); } std::string SlurmTaskExecutor::description() const { return "SlurmTaskExecutor"; } // Validates the job to ensure that all required values are set and are of // the right type, bool SlurmTaskExecutor::validateTaskParameters(const ConfigValues &job) { const std::unordered_set requiredFields{ "minCPUs", "minMemoryMB", "minTmpDiskMB", "priority", "timeLimitSeconds", "userID", "workDir", "tmpDir"}; for (const auto &requiredField : requiredFields) { if (job.count(requiredField) == 0) { throw std::runtime_error("Missing field " + requiredField); } } // Require command or commandString if (job.count("command") + job.count("commandString") == 0) throw std::runtime_error( "Either command or commandString must be specified"); if (job.count("environment")) { if (!std::holds_alternative(job.at("environment"))) throw std::runtime_error(R"(environment must be an array of strings)"); } return true; } std::vector SlurmTaskExecutor::expandTaskParameters( const ConfigValues &job, const ConfigValues &expansionValues) { std::vector newValues; auto command = (job.count("command") == 0 ? Command{} : std::get(job.at("command"))); auto environment = (job.count("environment") == 0 ? Command{} : std::get(job.at("environment"))); Command both(command); std::copy(environment.begin(), environment.end(), std::back_inserter(both)); for (const auto &parts : interpolateValues(both, expansionValues)) { ConfigValues newCommand{job}; newCommand["command"] = Command(parts.begin(), parts.begin() + command.size()); newCommand["environment"] = Command(parts.begin() + command.size(), parts.end()); newValues.emplace_back(newCommand); } return newValues; } TaskFuture SlurmTaskExecutor::execute(DAGRunID runID, const std::string &taskName, const Task &task) { std::stringstream executorLog; const auto &job = task.job; const auto uniqueTaskName = taskName + "_" + getUniqueTag(6); fs::path tmpDir = std::get(job.at("tmpDir")); std::string stdoutFile = (tmpDir / (uniqueTaskName + ".stdout")).string(); std::string stderrFile = (tmpDir / (uniqueTaskName + ".stderr")).string(); std::string workDir = std::get(job.at("workDir")); // Convert command to argc / argv std::vector argv{nullptr}; // Populate the command Command command; if (task.job.count("commandString")) { std::stringstream ss; ss << std::get(task.job.at("commandString")); std::string tok; while (ss >> std::quoted(tok)) { command.push_back(tok); } } else { const auto cmd = std::get(task.job.at("command")); std::copy(cmd.begin(), cmd.end(), std::back_inserter(command)); } std::transform( command.begin(), command.end(), std::back_inserter(argv), [](const std::string &s) { return const_cast(s.c_str()); }); argv.push_back(nullptr); std::vector env{""}; std::vector envp; auto it = task.job.find("environment"); if (it != task.job.end()) { const auto environment = std::get(task.job.at("environment")); std::copy(environment.begin(), environment.end(), std::back_inserter(env)); } std::transform( env.begin(), env.end(), std::back_inserter(envp), [](const std::string &s) { return const_cast(s.c_str()); }); char script[] = "#!/bin/bash\n$@\n"; char stdinFile[] = "/dev/null"; // taken from slurm int error_code; job_desc_msg_t jd; submit_response_msg_t *resp_msg; slurm_init_job_desc_msg(&jd); jd.contiguous = 1; jd.name = const_cast(taskName.c_str()); jd.min_cpus = std::stoi(std::get(job.at("minCPUs"))); jd.pn_min_memory = std::stoi(std::get(job.at("minMemoryMB"))); jd.pn_min_tmp_disk = std::stoi(std::get(job.at("minTmpDiskMB"))); jd.priority = std::stoi(std::get(job.at("priority"))); jd.shared = 0; jd.time_limit = std::stoi(std::get(job.at("timeLimitSeconds"))); jd.min_nodes = 1; jd.user_id = std::stoi(std::get(job.at("userID"))); jd.argv = argv.data(); jd.argc = argv.size(); // TODO figure out the script to run jd.script = script; jd.std_in = stdinFile; jd.std_err = const_cast(stderrFile.c_str()); jd.std_out = const_cast(stdoutFile.c_str()); jd.work_dir = const_cast(workDir.c_str()); // jd.env_size = 1; // jd.environment = env; jd.env_size = envp.size(); jd.environment = envp.data(); error_code = slurm_submit_batch_job(&jd, &resp_msg); if (error_code) { std::stringstream ss; ss << "Unable to submit slurm job: " << slurm_strerror(error_code); throw std::runtime_error(ss.str()); } uint32_t jobID = resp_msg->job_id; executorLog << "Job " << resp_msg->job_submit_user_msg << '\n'; slurm_free_submit_response_response_msg(resp_msg); std::lock_guard lock(promiseGuard_); Job newJob{.fut = std::make_shared>(), .stdoutFile = stdoutFile, .stderrFile = stderrFile, .runID = runID, .taskName = taskName}; auto fut = newJob.fut; runningJobs_.emplace(jobID, std::move(newJob)); return fut; } bool SlurmTaskExecutor::stop(DAGRunID runID, const std::string &taskName) { // Hopefully this isn't a common thing, so just scrap the current jobs and // kill them size_t jobID = 0; { std::lock_guard lock(promiseGuard_); for (const auto &[k, v] : runningJobs_) { if (v.runID == runID and v.taskName == taskName) { jobID = k; break; } } if (jobID == 0) return true; } // Send the kill message to slurm slurm_kill_job(jobID, SIGKILL, KILL_HURRY); return true; } void SlurmTaskExecutor::monitor() { std::unordered_set resolvedJobs; while (running_) { { std::lock_guard lock(promiseGuard_); for (auto &[jobID, job] : runningJobs_) { job_info_msg_t *jobStatus; int error_code = slurm_load_job(&jobStatus, jobID, SHOW_ALL | SHOW_DETAIL); if (error_code != SLURM_SUCCESS) continue; uint32_t idx = jobStatus->record_count; if (idx == 0) continue; idx--; const slurm_job_info_t &jobInfo = jobStatus->job_array[idx]; AttemptRecord record; switch (jobInfo.job_state) { case JOB_PENDING: case JOB_SUSPENDED: case JOB_RUNNING: continue; // Job has finished case JOB_COMPLETE: /* completed execution successfully */ record.rc = jobInfo.exit_code; break; case JOB_FAILED: /* completed execution unsuccessfully */ record.rc = jobInfo.exit_code; record.executorLog = "Script errored.\n"; break; case JOB_CANCELLED: /* cancelled by user */ record.rc = 9; // matches SIGKILL record.executorLog = "Job cancelled by user.\n"; break; case JOB_TIMEOUT: /* terminated on reaching time limit */ record.rc = jobInfo.exit_code; record.executorLog = "Job exceeded time limit.\n"; break; case JOB_NODE_FAIL: /* terminated on node failure */ record.rc = jobInfo.exit_code; record.executorLog = "Node failed during execution\n"; break; case JOB_PREEMPTED: /* terminated due to preemption */ record.rc = jobInfo.exit_code; record.executorLog = "Job terminated due to pre-emption.\n"; break; case JOB_BOOT_FAIL: /* terminated due to node boot failure */ record.rc = jobInfo.exit_code; record.executorLog = "Job failed to run due to failure of compute node to " "boot.\n"; break; case JOB_DEADLINE: /* terminated on deadline */ record.rc = jobInfo.exit_code; record.executorLog = "Job terminated due to deadline.\n"; break; case JOB_OOM: /* experienced out of memory error */ record.rc = jobInfo.exit_code; record.executorLog = "Job terminated due to out-of-memory.\n"; break; } slurm_free_job_info_msg(jobStatus); readAndClean(job.stdoutFile, record.outputLog); readAndClean(job.stderrFile, record.errorLog); job.fut->set(std::move(record)); resolvedJobs.insert(jobID); } for (const auto &jobID : resolvedJobs) { runningJobs_.extract(jobID); } } std::this_thread::sleep_for(std::chrono::seconds(1)); } } } // namespace daggy::executors::task #endif