diff --git a/daggy/include/daggy/Utilities.hpp b/daggy/include/daggy/Utilities.hpp index 9d8af8f..aba7bef 100644 --- a/daggy/include/daggy/Utilities.hpp +++ b/daggy/include/daggy/Utilities.hpp @@ -14,6 +14,8 @@ #include "DAG.hpp" namespace daggy { + std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement); + std::vector expandCommands(const std::vector &command, const ParameterValues ¶meters); DAG buildDAGFromTasks(const std::vector &tasks); diff --git a/daggy/src/Utilities.cpp b/daggy/src/Utilities.cpp index 62a8919..8bd00fe 100644 --- a/daggy/src/Utilities.cpp +++ b/daggy/src/Utilities.cpp @@ -3,28 +3,47 @@ #include namespace daggy { + std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement) { + size_t pos = string.find(pattern); + while (pos != std::string::npos) { + string.replace(pos, pattern.size(), replacement); + pos = string.find(pattern); + } + return string; + } + std::vector> expandCommands(const std::vector &command, const ParameterValues ¶meters) { std::vector> commands{{}}; for (const auto &part : command) { - // this isn't an interpolated value - if (parameters.find(part) == parameters.end()) { - for (auto &cmd : commands) cmd.push_back(part); - continue; - } - auto &inVal = parameters.at(part); - if (std::holds_alternative(inVal)) { - for (auto &cmd : commands) cmd.push_back(std::get(inVal)); - continue; + std::vector expandedPart; + + for (const auto &[param, paramValue] : parameters) { + auto pos = part.find(param); + if (pos == std::string::npos) continue; + std::vector newExpandedPart; + + if (std::holds_alternative(paramValue)) { + for (auto &cmd : expandedPart) { + newExpandedPart.push_back(globalSub(cmd, param, std::get(paramValue))); + } + } else { + for (const auto &val : std::get>(paramValue)) { + for (auto cmd : expandedPart) { + newExpandedPart.push_back(globalSub(cmd, param, val)); + } + } + } + + expandedPart.swap(newExpandedPart); } - // Ends up being expensive, as it's a cartesian product std::vector> newCommands; - for (const auto &val : std::get>(inVal)) { + for (const auto &newPart : expandedPart) { for (auto cmd : commands) { - cmd.push_back(val); - newCommands.push_back(cmd); + cmd.push_back(newPart); + newCommands.emplace_back(cmd); } } commands.swap(newCommands); @@ -136,4 +155,4 @@ namespace daggy { os << std::put_time(std::localtime(&t_c), "%Y-%m-%d %H:%M:%S %Z"); return os; } -} \ No newline at end of file +} diff --git a/tests/unit_utilities.cpp b/tests/unit_utilities.cpp index c5b9329..e1fc3eb 100644 --- a/tests/unit_utilities.cpp +++ b/tests/unit_utilities.cpp @@ -2,6 +2,10 @@ #include #include +#include +#include +#include + #include #include "daggy/Utilities.hpp" @@ -9,6 +13,12 @@ #include "daggy/executors/task/ForkingTaskExecutor.hpp" #include "daggy/loggers/dag_run/OStreamLogger.hpp" +TEST_CASE("String Utilities", "[utilities_string]") { + std::string test = "/this/is/{{A}}/test/{{A}}"; + auto res = daggy::globalSub(test, "{{A}}", "hello"); + REQUIRE(res == "/this/is/hello/test/hello"); +} + TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") { SECTION("Basic expansion") { std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"}; @@ -28,6 +38,22 @@ TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") { // TYPE isn't used, so it's just |DATE| * |SOURCE| REQUIRE(allCommands.size() == 2); } + + SECTION("Expand within a command part") { + std::string testParams{ + R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"}; + auto params = daggy::parametersFromJSON(testParams); + std::vector cmd{"/usr/bin/touch", "/tmp/{{DATE}}_{{SOURCE}}"}; + auto result = daggy::expandCommands(cmd, params); + + // TYPE isn't used, so it's just |DATE| * |SOURCE| + REQUIRE(result.size() == 4); + + for (const auto &command : result) { + std::copy(command.begin(), command.end(), std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + } + } } TEST_CASE("DAG Runner", "[utilities_dag_runner]") { @@ -41,4 +67,4 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") { auto runID = logger.startDAGRun("test_run", tasks); daggy::runDAG(runID, tasks, ex, logger, dag); -} \ No newline at end of file +}