From c0315b4f0bfdb419c59dd379e339b5c40729e7d3 Mon Sep 17 00:00:00 2001 From: Ian Roddis Date: Fri, 12 Nov 2021 16:08:57 -0400 Subject: [PATCH] Changing up variable interpolation in preparation of supporting interpolation for environments --- libdaggy/include/daggy/Utilities.hpp | 6 ++ libdaggy/src/Utilities.cpp | 95 ++++++++++++++++++---------- libdaggy/tests/unit_utilities.cpp | 54 +++++++++++++++- 3 files changed, 121 insertions(+), 34 deletions(-) diff --git a/libdaggy/include/daggy/Utilities.hpp b/libdaggy/include/daggy/Utilities.hpp index 7207615..f852fb1 100644 --- a/libdaggy/include/daggy/Utilities.hpp +++ b/libdaggy/include/daggy/Utilities.hpp @@ -18,6 +18,12 @@ namespace daggy { std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement); + std::unordered_set matchingParameters( + const std::vector &input, const ConfigValues &values); + + std::vector> + generateCartesianValues(const ConfigValues &values); + std::vector interpolateValues(const std::vector &raw, const ConfigValues &values); diff --git a/libdaggy/src/Utilities.cpp b/libdaggy/src/Utilities.cpp index aca020a..b20cf36 100644 --- a/libdaggy/src/Utilities.cpp +++ b/libdaggy/src/Utilities.cpp @@ -17,48 +17,77 @@ namespace daggy { return string; } + std::vector> + generateCartesianValues(const ConfigValues &values) + { + using ResultType = + std::vector>; + ResultType result{{}}; + + for (const auto &[k, v] : values) { + if (std::holds_alternative(v)) { + for (auto &valset : result) { + valset.emplace(k, std::get(v)); + } + } + else { + ResultType new_result; + for (const auto &val : std::get>(v)) { + for (auto valset : result) { + valset.emplace(k, val); + new_result.emplace_back(valset); + } + } + result.swap(new_result); + } + } + return result; + } + + std::unordered_set matchingParameters( + const std::vector &input, const ConfigValues &values) + { + std::unordered_set matchParams; + for (const auto &[k, v] : values) { + std::string pattern = "{{" + k + "}}"; + bool anyMatched = + std::any_of(input.begin(), input.end(), [&](const auto &part) { + return part.find(pattern) != std::string::npos; + }); + if (anyMatched) + matchParams.insert(k); + } + return matchParams; + } + std::vector> interpolateValues( const std::vector &raw, const ConfigValues &values) { - std::vector> cooked{{}}; + std::vector> cooked; - for (const auto &part : raw) { - std::vector expandedPart{part}; + auto matchParams = matchingParameters(raw, values); + if (matchParams.empty()) { + cooked.emplace_back(raw); + return cooked; + } - // Find all values of parameters, and expand them - for (const auto &[paramRaw, paramValue] : values) { - std::string param = "{{" + paramRaw + "}}"; - auto pos = part.find(param); - if (pos == std::string::npos) - continue; - std::vector newExpandedPart; + ConfigValues paramSubset; + for (const auto &[k, v] : values) { + if (matchParams.count(k) == 0) + continue; + paramSubset.emplace(k, v); + } - if (std::holds_alternative(paramValue)) { - for (auto &cmd : expandedPart) { - newExpandedPart.push_back( - globalSub(cmd, param, std::get(paramValue))); - } - } - else { - for (const auto &val : - std::get>(paramValue)) { - for (const auto &cmd : expandedPart) { - newExpandedPart.push_back(globalSub(cmd, param, val)); - } - } - } + const auto valueSets = generateCartesianValues(paramSubset); - expandedPart.swap(newExpandedPart); - } - - std::vector> newCommands; - for (const auto &newPart : expandedPart) { - for (auto cmd : cooked) { - cmd.push_back(newPart); - newCommands.emplace_back(cmd); + for (const auto &valueSet : valueSets) { + std::vector item(raw); + for (auto &part : item) { + for (const auto &[k, v] : valueSet) { + part = globalSub(part, "{{" + k + "}}", v); } } - cooked.swap(newCommands); + cooked.emplace_back(item); } return cooked; } diff --git a/libdaggy/tests/unit_utilities.cpp b/libdaggy/tests/unit_utilities.cpp index ddb6ae5..4c22a8f 100644 --- a/libdaggy/tests/unit_utilities.cpp +++ b/libdaggy/tests/unit_utilities.cpp @@ -8,6 +8,7 @@ #include "daggy/Serialization.hpp" #include "daggy/Utilities.hpp" +#include "daggy/executors/task/NoopTaskExecutor.hpp" TEST_CASE("string_utilities", "[utilities_string]") { @@ -16,7 +17,24 @@ TEST_CASE("string_utilities", "[utilities_string]") REQUIRE(res == "/this/is/hello/test/hello"); } -TEST_CASE("string_expansion", "[utilities_parameter_expansion]") +TEST_CASE("generate_cart_values", "[cartesian_values]") +{ + std::string testParams{ + R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"}; + auto params = daggy::configFromJSON(testParams); + + auto result = daggy::generateCartesianValues(params); + REQUIRE(result.size() == 6); + + for (const auto& valset : result) { + REQUIRE(valset.size() == 3); + REQUIRE(valset.count("DATE") == 1); + REQUIRE(valset.count("SOURCE") == 1); + REQUIRE(valset.count("TYPE") == 1); + } +} + +TEST_CASE("string_expansion", "[utilities][parameter_expansion]") { SECTION("Basic expansion") { @@ -30,6 +48,17 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]") REQUIRE(allCommands.size() == 6); } + SECTION("No expansion") + { + std::string testParams{ + R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"}; + auto params = daggy::configFromJSON(testParams); + std::vector cmd{"/usr/bin/echo"}; + auto allCommands = daggy::interpolateValues(cmd, params); + + REQUIRE(allCommands.size() == 1); + } + SECTION("Skip over unused parameters") { std::string testParams{ @@ -54,3 +83,26 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]") REQUIRE(result.size() == 4); } } + +TEST_CASE("expand_taskset", "[utilities][expand_taskset]") +{ + daggy::executors::task::NoopTaskExecutor ex; + daggy::DAGSpec dagSpec; + std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"}; + dagSpec.taskConfig.variables = daggy::configFromJSON(testParams); + + std::stringstream jsonTasks; + jsonTasks + << R"({ "A": { "job": {"command": [ "/usr/bin/cat", "/etc/passwd"]} )" + << R"(, "children": ["C"], "isGenerator": true},)" + << R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })"; + + dagSpec.tasks = daggy::tasksFromJSON(jsonTasks.str()); + REQUIRE(dagSpec.tasks.size() == 2); + REQUIRE(dagSpec.tasks["A"].children == std::unordered_set{"C"}); + dagSpec.tasks = + daggy::expandTaskSet(dagSpec.tasks, ex, dagSpec.taskConfig.variables); + REQUIRE(dagSpec.tasks.size() == 2); + REQUIRE(dagSpec.tasks["A_0"].children == + std::unordered_set{"C"}); +}