Massive re-org to allow per-executor job specification formats and executor-specific task validation and expansion.
A few different renames to try and keep things more consistent.
This commit is contained in:
166
README.md
166
README.md
@@ -58,7 +58,7 @@ DAG Run Definition
|
|||||||
daggy works as a standalone library, but generally runs as a service with a REST interface. This documentation is
|
daggy works as a standalone library, but generally runs as a service with a REST interface. This documentation is
|
||||||
specifically for submitting DAGs to the REST server for execution (a DAG run).
|
specifically for submitting DAGs to the REST server for execution (a DAG run).
|
||||||
|
|
||||||
DAGs are defined in JSON as a set of `tasks`, along with optional `taskParameters` and `executionParameters` (future).
|
DAGs are defined in JSON as a set of `tasks`, along with optional `job` and `executionParameters` (future).
|
||||||
|
|
||||||
Basic Definition
|
Basic Definition
|
||||||
--
|
--
|
||||||
@@ -84,18 +84,22 @@ Below is an example DAG Run submission:
|
|||||||
{
|
{
|
||||||
"tasks": {
|
"tasks": {
|
||||||
"task_one": {
|
"task_one": {
|
||||||
"command": [
|
"job": {
|
||||||
"/usr/bin/touch",
|
"command": [
|
||||||
"/tmp/somefile"
|
"/usr/bin/touch",
|
||||||
],
|
"/tmp/somefile"
|
||||||
|
]
|
||||||
|
},
|
||||||
"maxRetries": 3,
|
"maxRetries": 3,
|
||||||
"retryIntervalSeconds": 30
|
"retryIntervalSeconds": 30
|
||||||
},
|
},
|
||||||
"task_two": {
|
"task_two": {
|
||||||
"command": [
|
"job": {
|
||||||
"/usr/bin/touch",
|
"command": [
|
||||||
"/tmp/someotherfile"
|
"/usr/bin/touch",
|
||||||
],
|
"/tmp/someotherfile"
|
||||||
|
]
|
||||||
|
},
|
||||||
"maxRetries": 3,
|
"maxRetries": 3,
|
||||||
"retryIntervalSeconds": 30,
|
"retryIntervalSeconds": 30,
|
||||||
"parents": [
|
"parents": [
|
||||||
@@ -109,24 +113,25 @@ Below is an example DAG Run submission:
|
|||||||
Task Parameters
|
Task Parameters
|
||||||
--
|
--
|
||||||
|
|
||||||
Task commands can be parameterized by passing in an optional `taskParameters` member. Each parameter consists of a name
|
Task commands can be parameterized by passing in an optional `parameters` member. Each parameter consists of a name and
|
||||||
and either a string value, or an array of string values. Task commands will be regenerated based on the values of the
|
either a string value, or an array of string values. Tasks will be regenerated based on the values of the parameters.
|
||||||
parameters.
|
|
||||||
|
|
||||||
For instance:
|
For instance:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"taskParameters": {
|
"parameters": {
|
||||||
"DIRECTORY": "/var/tmp",
|
"DIRECTORY": "/var/tmp",
|
||||||
"FILE": "somefile"
|
"FILE": "somefile"
|
||||||
},
|
},
|
||||||
"tasks": {
|
"tasks": {
|
||||||
"task_one": {
|
"task_one": {
|
||||||
"command": [
|
"job": {
|
||||||
"/usr/bin/touch",
|
"command": [
|
||||||
"{{DIRECTORY}}/{{FILE}}"
|
"/usr/bin/touch",
|
||||||
],
|
"{{DIRECTORY}}/{{FILE}}"
|
||||||
|
]
|
||||||
|
},
|
||||||
"maxRetries": 3,
|
"maxRetries": 3,
|
||||||
"retryIntervalSeconds": 30
|
"retryIntervalSeconds": 30
|
||||||
}
|
}
|
||||||
@@ -135,7 +140,7 @@ For instance:
|
|||||||
```
|
```
|
||||||
|
|
||||||
`task_one`'s command, when run, will touch `/var/tmp/somefile`, since the values of `DIRECTORY` and `FILE` will be
|
`task_one`'s command, when run, will touch `/var/tmp/somefile`, since the values of `DIRECTORY` and `FILE` will be
|
||||||
populated from the `taskParameters` values.
|
populated from the `job` values.
|
||||||
|
|
||||||
In the case where a parameter has an array of values, any tasks referencing that value will be duplicated with the
|
In the case where a parameter has an array of values, any tasks referencing that value will be duplicated with the
|
||||||
cartesian product of all relevant values.
|
cartesian product of all relevant values.
|
||||||
@@ -144,7 +149,7 @@ Example:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"taskParameters": {
|
"job": {
|
||||||
"DIRECTORY": "/var/tmp",
|
"DIRECTORY": "/var/tmp",
|
||||||
"FILE": "somefile",
|
"FILE": "somefile",
|
||||||
"DATE": [
|
"DATE": [
|
||||||
@@ -155,22 +160,28 @@ Example:
|
|||||||
},
|
},
|
||||||
"tasks": {
|
"tasks": {
|
||||||
"populate_inputs": {
|
"populate_inputs": {
|
||||||
"command": [
|
"job": {
|
||||||
"/usr/bin/touch",
|
"command": [
|
||||||
"{{DIRECTORY}}/{{FILE}}"
|
"/usr/bin/touch",
|
||||||
]
|
"{{DIRECTORY}}/{{FILE}}"
|
||||||
|
]
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"calc_date": {
|
"calc_date": {
|
||||||
"command": [
|
"job": {
|
||||||
"/path/to/calculator",
|
"command": [
|
||||||
"{{DIRECTORY}}/{{FILE}}",
|
"/path/to/calculator",
|
||||||
"{{DATE}}"
|
"{{DIRECTORY}}/{{FILE}}",
|
||||||
]
|
"{{DATE}}"
|
||||||
|
]
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"generate_report": {
|
"generate_report": {
|
||||||
"command": [
|
"job": {
|
||||||
"/path/to/generator"
|
"command": [
|
||||||
]
|
"/path/to/generator"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -200,37 +211,48 @@ graph LR
|
|||||||
- `calc_date_2` will have the command `/path/to/calculator /var/tmp/somefile 2021-02-01`
|
- `calc_date_2` will have the command `/path/to/calculator /var/tmp/somefile 2021-02-01`
|
||||||
- `calc_date_3` will have the command `/path/to/calculator /var/tmp/somefile 2021-03-01`
|
- `calc_date_3` will have the command `/path/to/calculator /var/tmp/somefile 2021-03-01`
|
||||||
|
|
||||||
|
**NB**: When a task template resolves to multiple tasks instances, all of those new instances are still referred to by
|
||||||
|
the original name for the purposes of creating dependencies. e.g. to add a dependency dynamically (see next section),
|
||||||
|
you must refer to `"children": [ "calc_date" ]`, not to the individual `calc_date_1`.
|
||||||
|
|
||||||
Tasks Generating Tasks
|
Tasks Generating Tasks
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
Some DAG structures cannot be known ahead of time, but only at runtime. For instance, if a job pulls multiple files
|
Some DAG structures can only be fully known at runtime. For instance, if a job pulls multiple files from a source, each
|
||||||
from a source, each of which can be processed independently, it would be nice if the DAG could modify itself on the fly
|
of which can be processed independently, it would be nice if the DAG could modify itself on the fly to accomodate that
|
||||||
to accomodate that request.
|
request.
|
||||||
|
|
||||||
Enter the `generator` task. If a task is defined with `"isGenerator": true`, the output of the task is assumed to be
|
Enter the `generator` task. If a task is defined with `"isGenerator": true`, the output of the task is assumed to be a
|
||||||
a JSON dictionary containing new tasks to run. The new tasks will go through parameter expansion as described above,
|
JSON dictionary containing new tasks to run. The new tasks will go through parameter expansion as described above, using
|
||||||
and can freely define their dependencies the same way.
|
the same parameter list as the original DAG. New tasks can define their own dependencies.
|
||||||
|
|
||||||
**NB:** Generated tasks won't have any children dependencies unless you define them. If there are parameterized
|
**NB:** Generated tasks won't have any children dependencies unless you define them. If there are parameterized
|
||||||
dependencies, you must use the name of the original task (e.g. use `calc_date`, not `calc_date_1`) to add a dependency.
|
dependencies, you must use the name of the original task (e.g. use `calc_date`, not `calc_date_1`) to add a dependency.
|
||||||
|
|
||||||
**NB:** If you add a child dependency to a task that has already completed, weird things will happen. Don't do it.
|
**NB:** If you add a child dependency to a task that has already completed, that task won't restart. Best practice is to
|
||||||
|
create a dependency from the generator task to the task the new tasks will depend on.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"tasks": {
|
"tasks": {
|
||||||
"pull_files": {
|
"pull_files": {
|
||||||
"command": [
|
"job": {
|
||||||
"/path/to/puller/script",
|
"command": [
|
||||||
"{{DATE}}"
|
"/path/to/puller/script",
|
||||||
],
|
"{{DATE}}"
|
||||||
|
]
|
||||||
|
},
|
||||||
"isGenerator": true,
|
"isGenerator": true,
|
||||||
children: [ "generate_report" ]
|
children: [
|
||||||
|
"generate_report"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"generate_report": {
|
"generate_report": {
|
||||||
"command": [
|
"job": {
|
||||||
"/path/to/generator"
|
"command": [
|
||||||
]
|
"/path/to/generator"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -245,20 +267,29 @@ The output of the puller task might be:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"calc_date_a": {
|
"calc_date_a": {
|
||||||
"command": [
|
"job": {
|
||||||
"/path/to/calculator",
|
command
|
||||||
"/path/to/data/file/a"
|
": [
|
||||||
],
|
"/path/to/calculator",
|
||||||
"children": ["generate_report"]
|
"/path/to/data/file/a"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"calc_date_b": {
|
"children": [
|
||||||
"command": [
|
"generate_report"
|
||||||
"/path/to/calculator",
|
]
|
||||||
"/path/to/data/file/b"
|
},
|
||||||
],
|
"calc_date_b": {
|
||||||
"children": ["generate_report"]
|
"job": {
|
||||||
}
|
"command": [
|
||||||
|
"/path/to/calculator",
|
||||||
|
"/path/to/data/file/b"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
"generate_report"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -272,8 +303,9 @@ graph LR
|
|||||||
calc_file_a-->generate_report
|
calc_file_a-->generate_report
|
||||||
calc_file_b-->generate_report
|
calc_file_b-->generate_report
|
||||||
```
|
```
|
||||||
Note that it was important that `generate_report` depend on `pull_files`, otherwise the two task would
|
|
||||||
run concurrently, and the `generate_report` wouldn't have any files to report on.
|
Note that it was important that `generate_report` depend on `pull_files`, otherwise the two task would run concurrently,
|
||||||
|
and the `generate_report` wouldn't have any files to report on.
|
||||||
|
|
||||||
Execution Parameters
|
Execution Parameters
|
||||||
--
|
--
|
||||||
@@ -288,3 +320,15 @@ jobs on slurm with a specific set of restrictions, or allow for local execution
|
|||||||
|-----------|-------------|
|
|-----------|-------------|
|
||||||
| pool | Names the executor the DAG should run on |
|
| pool | Names the executor the DAG should run on |
|
||||||
| poolParameters | Any parameters the executor accepts that might modify how a task is run |
|
| poolParameters | Any parameters the executor accepts that might modify how a task is run |
|
||||||
|
|
||||||
|
Executors
|
||||||
|
=========
|
||||||
|
|
||||||
|
Different executors require different structures for the `job` task member.
|
||||||
|
|
||||||
|
Default Job Values
|
||||||
|
------------------
|
||||||
|
|
||||||
|
A DAG can be submitted with the extra section `jobDefaults`. These values will be used to fill in default values for all
|
||||||
|
tasks if they aren't overridden. This can be useful for cases like Slurm execution, where tasks will share default
|
||||||
|
memory and runtime requirements.
|
||||||
|
|||||||
51
TODO.md
51
TODO.md
@@ -1,27 +1,30 @@
|
|||||||
Tasks
|
Tasks
|
||||||
==
|
==
|
||||||
|
|
||||||
- Open
|
- REST Server
|
||||||
- REST Server
|
- [ ] Add in authorization scheme (maybe PAM auth endpoint with JWT?)
|
||||||
- [ ] Add in authorization scheme (maybe PAM auth endpoint with JWT?)
|
- Core Functionality
|
||||||
- Core Functionality
|
- rename TaskList to TaskSet
|
||||||
- Allow `{{,!DATES}}` style interpolation, where values from `{{DATES}}` are preserved as a single, comma
|
- Add support for Task.runSpec
|
||||||
separated list
|
- Executors can validate runspecs
|
||||||
- Executors
|
- Executors can expand runspecs
|
||||||
- [ ] Slurm Executor
|
- Allow `{{,!DATES}}` style interpolation, where values from `{{DATES}}` are preserved as a single, comma
|
||||||
- Loggers
|
separated list
|
||||||
- [ ] FileSystemLogger
|
- Executors
|
||||||
- [ ] Add unit tests
|
- [ ] Slurm Executor
|
||||||
- [ ] Add more error checking
|
- Loggers
|
||||||
- [ ] General logger
|
- [ ] FileSystemLogger
|
||||||
- [ ] Redis DAGRunLogger
|
- [ ] Add unit tests
|
||||||
- Server
|
- [ ] Add more error checking
|
||||||
- [ ] Multiple execution pools
|
- [ ] General logger
|
||||||
- [ ] per-Executor parameters
|
- [ ] Redis DAGRunLogger
|
||||||
- Utilities
|
- Server
|
||||||
- daggyd
|
- [ ] Multiple execution pools
|
||||||
- [ ] Add config file support
|
- [ ] per-Executor parameters
|
||||||
- [ ] Support for all the different executors / state loggers
|
- Utilities
|
||||||
- daggyc
|
- daggyd
|
||||||
- [ ] Submission
|
- [ ] Add config file support
|
||||||
- [ ] Querying
|
- [ ] Support for all the different executors / state loggers
|
||||||
|
- daggyc
|
||||||
|
- [ ] Submission
|
||||||
|
- [ ] Querying
|
||||||
|
|||||||
@@ -11,8 +11,8 @@
|
|||||||
|
|
||||||
namespace daggy {
|
namespace daggy {
|
||||||
// Commands and parameters
|
// Commands and parameters
|
||||||
using ParameterValue = std::variant<std::string, std::vector<std::string>>;
|
using ConfigValue = std::variant<std::string, std::vector<std::string>>;
|
||||||
using ParameterValues = std::unordered_map<std::string, ParameterValue>;
|
using ConfigValues = std::unordered_map<std::string, ConfigValue>;
|
||||||
using Command = std::vector<std::string>;
|
using Command = std::vector<std::string>;
|
||||||
|
|
||||||
// Time
|
// Time
|
||||||
@@ -21,10 +21,9 @@ namespace daggy {
|
|||||||
|
|
||||||
// DAG Runs
|
// DAG Runs
|
||||||
using DAGRunID = size_t;
|
using DAGRunID = size_t;
|
||||||
using TaskID = size_t;
|
|
||||||
|
|
||||||
BETTER_ENUM(RunState, uint32_t,
|
BETTER_ENUM(RunState, uint32_t,
|
||||||
QUEUED = 1,
|
QUEUED = 1 << 0,
|
||||||
RUNNING = 1 << 1,
|
RUNNING = 1 << 1,
|
||||||
RETRY = 1 << 2,
|
RETRY = 1 << 2,
|
||||||
ERRORED = 1 << 3,
|
ERRORED = 1 << 3,
|
||||||
@@ -34,25 +33,25 @@ namespace daggy {
|
|||||||
|
|
||||||
struct Task {
|
struct Task {
|
||||||
std::string definedName;
|
std::string definedName;
|
||||||
std::vector<std::string> command;
|
bool isGenerator; // True if the output of this task is a JSON set of tasks to complete
|
||||||
uint32_t maxRetries;
|
uint32_t maxRetries;
|
||||||
uint32_t retryIntervalSeconds; // Time to wait between retries
|
uint32_t retryIntervalSeconds; // Time to wait between retries
|
||||||
|
ConfigValues job; // It's up to the individual inspectors to convert values from strings // array of strings
|
||||||
std::unordered_set<std::string> children;
|
std::unordered_set<std::string> children;
|
||||||
std::unordered_set<std::string> parents;
|
std::unordered_set<std::string> parents;
|
||||||
bool isGenerator; // True if the output of this task is a JSON set of tasks to complete
|
|
||||||
|
|
||||||
bool operator==(const Task &other) const {
|
bool operator==(const Task &other) const {
|
||||||
return (definedName == other.definedName)
|
return (definedName == other.definedName)
|
||||||
and (maxRetries == other.maxRetries)
|
and (maxRetries == other.maxRetries)
|
||||||
and (retryIntervalSeconds == other.retryIntervalSeconds)
|
and (retryIntervalSeconds == other.retryIntervalSeconds)
|
||||||
and (command == other.command)
|
and (job == other.job)
|
||||||
and (children == other.children)
|
and (children == other.children)
|
||||||
and (parents == other.parents)
|
and (parents == other.parents)
|
||||||
and (isGenerator == other.isGenerator);
|
and (isGenerator == other.isGenerator);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
using TaskList = std::unordered_map<std::string, Task>;
|
using TaskSet = std::unordered_map<std::string, Task>;
|
||||||
|
|
||||||
struct AttemptRecord {
|
struct AttemptRecord {
|
||||||
TimePoint startTime;
|
TimePoint startTime;
|
||||||
|
|||||||
@@ -12,22 +12,26 @@
|
|||||||
namespace rj = rapidjson;
|
namespace rj = rapidjson;
|
||||||
|
|
||||||
namespace daggy {
|
namespace daggy {
|
||||||
// Parameters
|
void checkRJParse(const rj::ParseResult &result, const std::string &prefix = "");
|
||||||
ParameterValues parametersFromJSON(const std::string &jsonSpec);
|
|
||||||
|
|
||||||
ParameterValues parametersFromJSON(const rj::Value &spec);
|
// Parameters
|
||||||
|
ConfigValues configFromJSON(const std::string &jsonSpec);
|
||||||
|
|
||||||
|
ConfigValues configFromJSON(const rj::Value &spec);
|
||||||
|
|
||||||
|
std::string configToJSON(const ConfigValues &config);
|
||||||
|
|
||||||
// Tasks
|
// Tasks
|
||||||
TaskList
|
Task
|
||||||
taskFromJSON(const std::string &name, const rj::Value &spec, const ParameterValues ¶meters = {});
|
taskFromJSON(const std::string &name, const rj::Value &spec, const ConfigValues &jobDefaults = {});
|
||||||
|
|
||||||
TaskList tasksFromJSON(const std::string &jsonSpec, const ParameterValues ¶meters = {});
|
TaskSet tasksFromJSON(const std::string &jsonSpec, const ConfigValues &jobDefaults = {});
|
||||||
|
|
||||||
TaskList tasksFromJSON(const rj::Value &spec, const ParameterValues ¶meters = {});
|
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults = {});
|
||||||
|
|
||||||
std::string taskToJSON(const Task &task);
|
std::string taskToJSON(const Task &task);
|
||||||
|
|
||||||
std::string tasksToJSON(const TaskList &tasks);
|
std::string tasksToJSON(const TaskSet &tasks);
|
||||||
|
|
||||||
// Attempt Records
|
// Attempt Records
|
||||||
std::string attemptRecordToJSON(const AttemptRecord &attemptRecord);
|
std::string attemptRecordToJSON(const AttemptRecord &attemptRecord);
|
||||||
|
|||||||
@@ -17,13 +17,19 @@ namespace daggy {
|
|||||||
|
|
||||||
std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement);
|
std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement);
|
||||||
|
|
||||||
std::vector<Command> expandCommands(const std::vector<std::string> &command, const ParameterValues ¶meters);
|
std::vector<Command> interpolateValues(const std::vector<std::string> &raw, const ConfigValues &values);
|
||||||
|
|
||||||
|
TaskSet
|
||||||
|
expandTaskSet(const TaskSet &tasks,
|
||||||
|
executors::task::TaskExecutor &executor,
|
||||||
|
const ConfigValues &interpolatedValues = {});
|
||||||
|
|
||||||
|
|
||||||
TaskDAG
|
TaskDAG
|
||||||
buildDAGFromTasks(TaskList &tasks,
|
buildDAGFromTasks(TaskSet &tasks,
|
||||||
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates = {});
|
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates = {});
|
||||||
|
|
||||||
void updateDAGFromTasks(TaskDAG &dag, TaskList &tasks);
|
void updateDAGFromTasks(TaskDAG &dag, TaskSet &tasks);
|
||||||
|
|
||||||
// Blocking call
|
// Blocking call
|
||||||
std::vector<AttemptRecord>
|
std::vector<AttemptRecord>
|
||||||
@@ -37,7 +43,7 @@ namespace daggy {
|
|||||||
executors::task::TaskExecutor &executor,
|
executors::task::TaskExecutor &executor,
|
||||||
loggers::dag_run::DAGRunLogger &logger,
|
loggers::dag_run::DAGRunLogger &logger,
|
||||||
TaskDAG dag,
|
TaskDAG dag,
|
||||||
const ParameterValues taskParameters = {});
|
const ConfigValues job = {});
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &os, const TimePoint &tp);
|
std::ostream &operator<<(std::ostream &os, const TimePoint &tp);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,18 +2,22 @@
|
|||||||
|
|
||||||
#include "TaskExecutor.hpp"
|
#include "TaskExecutor.hpp"
|
||||||
|
|
||||||
namespace daggy {
|
namespace daggy::executors::task {
|
||||||
namespace executors {
|
class ForkingTaskExecutor : public TaskExecutor {
|
||||||
namespace task {
|
public:
|
||||||
class ForkingTaskExecutor : public TaskExecutor {
|
using Command = std::vector<std::string>;
|
||||||
public:
|
|
||||||
ForkingTaskExecutor(size_t nThreads)
|
|
||||||
: TaskExecutor(nThreads) {}
|
|
||||||
|
|
||||||
const std::string getName() const override { return "ForkingTaskExecutor"; }
|
ForkingTaskExecutor(size_t nThreads)
|
||||||
|
: TaskExecutor(nThreads) {}
|
||||||
|
|
||||||
AttemptRecord runCommand(const Task &task) override;
|
// Validates the job to ensure that all required values are set and are of the right type,
|
||||||
};
|
bool validateTaskParameters(const ConfigValues &job) override;
|
||||||
}
|
|
||||||
}
|
std::vector<ConfigValues>
|
||||||
|
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||||
|
|
||||||
|
// Runs the task
|
||||||
|
AttemptRecord execute(const Task &task) override;
|
||||||
|
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,20 +14,21 @@
|
|||||||
If there are many retries, logs are returned for each attempt.
|
If there are many retries, logs are returned for each attempt.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
namespace daggy {
|
namespace daggy::executors::task {
|
||||||
namespace executors {
|
class TaskExecutor {
|
||||||
namespace task {
|
public:
|
||||||
class TaskExecutor {
|
TaskExecutor(size_t nThreads) : threadPool(nThreads) {};
|
||||||
public:
|
|
||||||
TaskExecutor(size_t nThreads) : threadPool(nThreads) {};
|
|
||||||
|
|
||||||
virtual const std::string getName() const = 0;
|
// Validates the job to ensure that all required values are set and are of the right type,
|
||||||
|
virtual bool validateTaskParameters(const ConfigValues &job) = 0;
|
||||||
|
|
||||||
// This will block if the dag_executor is full
|
// Will use the expansion values to return the fully expanded tasks.
|
||||||
virtual AttemptRecord runCommand(const Task &task) = 0;
|
virtual std::vector<ConfigValues>
|
||||||
|
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) = 0;
|
||||||
|
|
||||||
ThreadPool threadPool;
|
// Blocking execution of a task
|
||||||
};
|
virtual AttemptRecord execute(const Task &task) = 0;
|
||||||
}
|
|
||||||
}
|
ThreadPool threadPool;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ namespace daggy {
|
|||||||
class DAGRunLogger {
|
class DAGRunLogger {
|
||||||
public:
|
public:
|
||||||
// Execution
|
// Execution
|
||||||
virtual DAGRunID startDAGRun(std::string name, const TaskList &tasks) = 0;
|
virtual DAGRunID startDAGRun(std::string name, const TaskSet &tasks) = 0;
|
||||||
|
|
||||||
virtual void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) = 0;
|
virtual void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) = 0;
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ namespace daggy::loggers::dag_run {
|
|||||||
// Pretty heavy weight, but
|
// Pretty heavy weight, but
|
||||||
struct DAGRunRecord {
|
struct DAGRunRecord {
|
||||||
std::string name;
|
std::string name;
|
||||||
TaskList tasks;
|
TaskSet tasks;
|
||||||
std::unordered_map<std::string, RunState> taskRunStates;
|
std::unordered_map<std::string, RunState> taskRunStates;
|
||||||
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
|
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
|
||||||
std::vector<TaskUpdateRecord> taskStateChanges;
|
std::vector<TaskUpdateRecord> taskStateChanges;
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ namespace daggy::loggers::dag_run {
|
|||||||
FileSystemLogger(fs::path root);
|
FileSystemLogger(fs::path root);
|
||||||
|
|
||||||
// Execution
|
// Execution
|
||||||
DAGRunID startDAGRun(std::string name, const TaskList &tasks) override;
|
DAGRunID startDAGRun(std::string name, const TaskSet &tasks) override;
|
||||||
|
|
||||||
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
|
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ namespace daggy {
|
|||||||
OStreamLogger(std::ostream &os);
|
OStreamLogger(std::ostream &os);
|
||||||
|
|
||||||
// Execution
|
// Execution
|
||||||
DAGRunID startDAGRun(std::string name, const TaskList &tasks) override;
|
DAGRunID startDAGRun(std::string name, const TaskSet &tasks) override;
|
||||||
|
|
||||||
void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) override;
|
void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) override;
|
||||||
|
|
||||||
|
|||||||
@@ -1,22 +1,30 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
|
||||||
|
#include <rapidjson/error/en.h>
|
||||||
|
|
||||||
#include <daggy/Serialization.hpp>
|
#include <daggy/Serialization.hpp>
|
||||||
#include <daggy/Utilities.hpp>
|
#include <daggy/Utilities.hpp>
|
||||||
|
|
||||||
namespace daggy {
|
namespace daggy {
|
||||||
|
void checkRJParse(const rj::ParseResult &result, const std::string &prefix) {
|
||||||
ParameterValues parametersFromJSON(const std::string &jsonSpec) {
|
if (!result) {
|
||||||
rj::Document doc;
|
std::stringstream ss;
|
||||||
rj::ParseResult parseResult = doc.Parse(jsonSpec.c_str());
|
ss << (prefix.empty() ? "" : prefix + ':')
|
||||||
if (!parseResult) {
|
<< "Error parsing JSON: " << rj::GetParseError_En(result.Code())
|
||||||
throw std::runtime_error("Parameters spec is not valid JSON");
|
<< " at byte offset " << result.Offset();
|
||||||
|
throw std::runtime_error(ss.str());
|
||||||
}
|
}
|
||||||
return parametersFromJSON(doc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ParameterValues parametersFromJSON(const rj::Value &spec) {
|
ConfigValues configFromJSON(const std::string &jsonSpec) {
|
||||||
std::unordered_map<std::string, ParameterValue> parameters;
|
rj::Document doc;
|
||||||
|
checkRJParse(doc.Parse(jsonSpec.c_str()), "Parsing config");
|
||||||
|
return configFromJSON(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConfigValues configFromJSON(const rj::Value &spec) {
|
||||||
|
std::unordered_map<std::string, ConfigValue> parameters;
|
||||||
if (!spec.IsObject()) { throw std::runtime_error("Parameters in spec is not a JSON dictionary"); }
|
if (!spec.IsObject()) { throw std::runtime_error("Parameters in spec is not a JSON dictionary"); }
|
||||||
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
|
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
|
||||||
if (!it->name.IsString()) {
|
if (!it->name.IsString()) {
|
||||||
@@ -44,86 +52,105 @@ namespace daggy {
|
|||||||
return parameters;
|
return parameters;
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskList
|
std::string configToJSON(const ConfigValues &config) {
|
||||||
taskFromJSON(const std::string &name, const rj::Value &spec, const ParameterValues ¶meters) {
|
std::stringstream ss;
|
||||||
TaskList tasks;
|
ss << '{';
|
||||||
|
bool first = true;
|
||||||
|
for (const auto &[k, v]: config) {
|
||||||
|
if (first) { first = false; } else { ss << ", "; }
|
||||||
|
ss << std::quoted(k) << ": ";
|
||||||
|
if (std::holds_alternative<std::string>(v)) {
|
||||||
|
ss << std::quoted(std::get<std::string>(v));
|
||||||
|
} else {
|
||||||
|
ss << '[';
|
||||||
|
const auto &vals = std::get<std::vector<std::string>>(v);
|
||||||
|
bool firstVal = true;
|
||||||
|
for (const auto &val: vals) {
|
||||||
|
if (firstVal) { firstVal = false; } else { ss << ", "; }
|
||||||
|
ss << std::quoted(val);
|
||||||
|
}
|
||||||
|
ss << ']';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ss << '}';
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
Task
|
||||||
|
taskFromJSON(const std::string &name, const rj::Value &spec, const ConfigValues &jobDefaults) {
|
||||||
|
Task task{
|
||||||
|
.definedName = name,
|
||||||
|
.isGenerator = false,
|
||||||
|
.maxRetries = 0,
|
||||||
|
.retryIntervalSeconds = 0,
|
||||||
|
.job = jobDefaults
|
||||||
|
};
|
||||||
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); }
|
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); }
|
||||||
|
|
||||||
if (!spec.HasMember("command")) {
|
|
||||||
throw std::runtime_error("Task " + name + " is missing required 'command' field");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Grab the standard fields with defaults;
|
// Grab the standard fields with defaults;
|
||||||
bool isGenerator = false;
|
|
||||||
if (spec.HasMember("isGenerator")) {
|
if (spec.HasMember("isGenerator")) {
|
||||||
isGenerator = spec["isGenerator"].GetBool();
|
task.isGenerator = spec["isGenerator"].GetBool();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t maxRetries = 0;
|
if (spec.HasMember("maxRetries")) {
|
||||||
if (spec.HasMember("maxRetries")) { maxRetries = spec["maxRetries"].GetInt(); }
|
task.maxRetries = spec["maxRetries"].GetInt();
|
||||||
uint8_t retryIntervalSeconds = 0;
|
}
|
||||||
if (spec.HasMember(
|
|
||||||
"retryIntervalSeconds")) { retryIntervalSeconds = spec["retryIntervalSeconds"].GetInt(); }
|
if (spec.HasMember("retryIntervalSeconds")) {
|
||||||
|
task.retryIntervalSeconds = spec["retryIntervalSeconds"].GetInt();
|
||||||
|
}
|
||||||
|
|
||||||
// Children / parents
|
// Children / parents
|
||||||
std::unordered_set<std::string> children;
|
|
||||||
if (spec.HasMember("children")) {
|
if (spec.HasMember("children")) {
|
||||||
const auto &specChildren = spec["children"].GetArray();
|
const auto &specChildren = spec["children"].GetArray();
|
||||||
for (size_t c = 0; c < specChildren.Size(); ++c) {
|
for (size_t c = 0; c < specChildren.Size(); ++c) {
|
||||||
children.insert(specChildren[c].GetString());
|
task.children.insert(specChildren[c].GetString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_set<std::string> parents;
|
|
||||||
if (spec.HasMember("parents")) {
|
if (spec.HasMember("parents")) {
|
||||||
const auto &specParents = spec["parents"].GetArray();
|
const auto &specParents = spec["parents"].GetArray();
|
||||||
for (size_t c = 0; c < specParents.Size(); ++c) {
|
for (size_t c = 0; c < specParents.Size(); ++c) {
|
||||||
parents.insert(specParents[c].GetString());
|
task.parents.insert(specParents[c].GetString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build out the commands
|
if (spec.HasMember("job")) {
|
||||||
std::vector<std::string> command;
|
const auto ¶ms = spec["job"];
|
||||||
for (size_t cmd = 0; cmd < spec["command"].Size(); ++cmd) {
|
if (!params.IsObject()) throw std::runtime_error("job is not a dictionary.");
|
||||||
command.emplace_back(spec["command"][cmd].GetString());
|
for (auto it = params.MemberBegin(); it != params.MemberEnd(); ++it) {
|
||||||
|
if (!it->name.IsString()) throw std::runtime_error("job key must be a string.");
|
||||||
|
if (it->value.IsArray()) {
|
||||||
|
std::vector<std::string> values;
|
||||||
|
for (size_t i = 0; i < it->value.Size(); ++i) {
|
||||||
|
values.emplace_back(it->value[i].GetString());
|
||||||
|
}
|
||||||
|
task.job.insert_or_assign(it->name.GetString(), values);
|
||||||
|
} else {
|
||||||
|
task.job.insert_or_assign(it->name.GetString(), it->value.GetString());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
auto commands = expandCommands(command, parameters);
|
|
||||||
|
|
||||||
// Create the tasks
|
return task;
|
||||||
for (size_t tid = 0; tid < commands.size(); ++tid) {
|
|
||||||
std::string taskName = (commands.size() == 1 ? name : name + "_" + std::to_string(tid));
|
|
||||||
tasks.emplace(taskName, Task{
|
|
||||||
.definedName = name,
|
|
||||||
.command = commands[tid],
|
|
||||||
.maxRetries = maxRetries,
|
|
||||||
.retryIntervalSeconds = retryIntervalSeconds,
|
|
||||||
.children = children,
|
|
||||||
.parents = parents,
|
|
||||||
.isGenerator = isGenerator
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return tasks;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskList tasksFromJSON(const std::string &jsonSpec, const ParameterValues ¶meters) {
|
TaskSet tasksFromJSON(const std::string &jsonSpec, const ConfigValues &jobDefaults) {
|
||||||
rj::Document doc;
|
rj::Document doc;
|
||||||
rj::ParseResult parseResult = doc.Parse(jsonSpec.c_str());
|
checkRJParse(doc.Parse(jsonSpec.c_str()));
|
||||||
if (!parseResult) {
|
return tasksFromJSON(doc, jobDefaults);
|
||||||
throw std::runtime_error("Unable to parse spec: ");
|
|
||||||
}
|
|
||||||
return tasksFromJSON(doc, parameters);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskList tasksFromJSON(const rj::Value &spec, const ParameterValues ¶meters) {
|
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults) {
|
||||||
TaskList tasks;
|
TaskSet tasks;
|
||||||
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); }
|
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); }
|
||||||
|
|
||||||
// Tasks
|
// Tasks
|
||||||
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
|
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
|
||||||
if (!it->name.IsString()) throw std::runtime_error("Task names must be a string.");
|
if (!it->name.IsString()) throw std::runtime_error("Task names must be a string.");
|
||||||
if (!it->value.IsObject()) throw std::runtime_error("Task definitions must be an object.");
|
if (!it->value.IsObject()) throw std::runtime_error("Task definitions must be an object.");
|
||||||
auto subTasks = taskFromJSON(it->name.GetString(), it->value, parameters);
|
const auto &taskName = it->name.GetString();
|
||||||
tasks.merge(subTasks);
|
tasks.emplace(taskName, taskFromJSON(taskName, it->value, jobDefaults));
|
||||||
}
|
}
|
||||||
return tasks;
|
return tasks;
|
||||||
}
|
}
|
||||||
@@ -138,15 +165,7 @@ namespace daggy {
|
|||||||
<< R"("maxRetries": )" << task.maxRetries << ','
|
<< R"("maxRetries": )" << task.maxRetries << ','
|
||||||
<< R"("retryIntervalSeconds": )" << task.retryIntervalSeconds << ',';
|
<< R"("retryIntervalSeconds": )" << task.retryIntervalSeconds << ',';
|
||||||
|
|
||||||
// Commands
|
ss << R"("job": )" << configToJSON(task.job) << ',';
|
||||||
ss << R"("command": [)";
|
|
||||||
first = true;
|
|
||||||
for (const auto &part: task.command) {
|
|
||||||
if (!first) ss << ',';
|
|
||||||
ss << std::quoted(part);
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
ss << "],";
|
|
||||||
|
|
||||||
ss << R"("children": [)";
|
ss << R"("children": [)";
|
||||||
first = true;
|
first = true;
|
||||||
@@ -163,7 +182,7 @@ namespace daggy {
|
|||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string tasksToJSON(const TaskList &tasks) {
|
std::string tasksToJSON(const TaskSet &tasks) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
|
||||||
ss << "{";
|
ss << "{";
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ namespace daggy {
|
|||||||
/*
|
/*
|
||||||
* {
|
* {
|
||||||
* "name": "DAG Run Name"
|
* "name": "DAG Run Name"
|
||||||
* "taskParameters": {...}
|
* "job": {...}
|
||||||
* "tasks": {...}
|
* "tasks": {...}
|
||||||
*/
|
*/
|
||||||
void Server::handleRunDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
void Server::handleRunDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
|
||||||
@@ -105,21 +105,33 @@ namespace daggy {
|
|||||||
std::string runName = doc["name"].GetString();
|
std::string runName = doc["name"].GetString();
|
||||||
|
|
||||||
// Get parameters if there are any
|
// Get parameters if there are any
|
||||||
ParameterValues parameters;
|
ConfigValues parameters;
|
||||||
if (doc.HasMember("taskParameters")) {
|
if (doc.HasMember("parameters")) {
|
||||||
try {
|
try {
|
||||||
auto parsedParams = parametersFromJSON(doc["taskParameters"].GetObject());
|
auto parsedParams = configFromJSON(doc["parameters"].GetObject());
|
||||||
parameters.swap(parsedParams);
|
parameters.swap(parsedParams);
|
||||||
} catch (std::exception &e) {
|
} catch (std::exception &e) {
|
||||||
REQ_ERROR(Bad_Request, e.what());
|
REQ_ERROR(Bad_Request, e.what());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Job Defaults
|
||||||
|
ConfigValues jobDefaults;
|
||||||
|
if (doc.HasMember("jobDefaults")) {
|
||||||
|
try {
|
||||||
|
auto parsedJobDefaults = configFromJSON(doc["jobDefaults"].GetObject());
|
||||||
|
jobDefaults.swap(parsedJobDefaults);
|
||||||
|
} catch (std::exception &e) {
|
||||||
|
REQ_ERROR(Bad_Request, e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get the tasks
|
// Get the tasks
|
||||||
TaskList tasks;
|
TaskSet tasks;
|
||||||
try {
|
try {
|
||||||
auto parsedTasks = tasksFromJSON(doc["tasks"], parameters);
|
auto taskTemplates = tasksFromJSON(doc["tasks"], jobDefaults);
|
||||||
tasks.swap(parsedTasks);
|
auto expandedTasks = expandTaskSet(taskTemplates, executor_, parameters);
|
||||||
|
tasks.swap(expandedTasks);
|
||||||
} catch (std::exception &e) {
|
} catch (std::exception &e) {
|
||||||
REQ_ERROR(Bad_Request, e.what());
|
REQ_ERROR(Bad_Request, e.what());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,14 +14,14 @@ namespace daggy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<std::string>>
|
std::vector<std::vector<std::string>>
|
||||||
expandCommands(const std::vector<std::string> &command, const ParameterValues ¶meters) {
|
interpolateValues(const std::vector<std::string> &raw, const ConfigValues &values) {
|
||||||
std::vector<std::vector<std::string>> commands{{}};
|
std::vector<std::vector<std::string>> cooked{{}};
|
||||||
|
|
||||||
for (const auto &part: command) {
|
for (const auto &part: raw) {
|
||||||
std::vector<std::string> expandedPart{part};
|
std::vector<std::string> expandedPart{part};
|
||||||
|
|
||||||
// Find all values of parameters, and expand them
|
// Find all values of parameters, and expand them
|
||||||
for (const auto &[paramRaw, paramValue]: parameters) {
|
for (const auto &[paramRaw, paramValue]: values) {
|
||||||
std::string param = "{{" + paramRaw + "}}";
|
std::string param = "{{" + paramRaw + "}}";
|
||||||
auto pos = part.find(param);
|
auto pos = part.find(param);
|
||||||
if (pos == std::string::npos) continue;
|
if (pos == std::string::npos) continue;
|
||||||
@@ -44,17 +44,42 @@ namespace daggy {
|
|||||||
|
|
||||||
std::vector<std::vector<std::string>> newCommands;
|
std::vector<std::vector<std::string>> newCommands;
|
||||||
for (const auto &newPart: expandedPart) {
|
for (const auto &newPart: expandedPart) {
|
||||||
for (auto cmd: commands) {
|
for (auto cmd: cooked) {
|
||||||
cmd.push_back(newPart);
|
cmd.push_back(newPart);
|
||||||
newCommands.emplace_back(cmd);
|
newCommands.emplace_back(cmd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
commands.swap(newCommands);
|
cooked.swap(newCommands);
|
||||||
}
|
}
|
||||||
return commands;
|
return cooked;
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateDAGFromTasks(TaskDAG &dag, TaskList &tasks) {
|
TaskSet
|
||||||
|
expandTaskSet(const TaskSet &tasks,
|
||||||
|
executors::task::TaskExecutor &executor,
|
||||||
|
const ConfigValues &interpolatedValues) {
|
||||||
|
// Expand the tasks first
|
||||||
|
TaskSet newTaskSet;
|
||||||
|
for (const auto &[baseName, task]: tasks) {
|
||||||
|
executor.validateTaskParameters(task.job);
|
||||||
|
const auto newJobs = executor.expandTaskParameters(task.job, interpolatedValues);
|
||||||
|
if (newJobs.size() == 1) {
|
||||||
|
newTaskSet.emplace(baseName, task);
|
||||||
|
} else {
|
||||||
|
size_t i = 0;
|
||||||
|
for (const auto &newJob: newJobs) {
|
||||||
|
Task newTask{task};
|
||||||
|
newTask.job = newJob;
|
||||||
|
newTaskSet.emplace(baseName + "_" + std::to_string(i), newTask);
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return newTaskSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void updateDAGFromTasks(TaskDAG &dag, TaskSet &tasks) {
|
||||||
// Add all the vertices
|
// Add all the vertices
|
||||||
std::unordered_map<std::string, std::unordered_set<std::string>> definedSets;
|
std::unordered_map<std::string, std::unordered_set<std::string>> definedSets;
|
||||||
for (const auto &[name, task]: tasks) {
|
for (const auto &[name, task]: tasks) {
|
||||||
@@ -79,10 +104,9 @@ namespace daggy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TaskDAG buildDAGFromTasks(TaskList &tasks,
|
TaskDAG buildDAGFromTasks(TaskSet &tasks,
|
||||||
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates) {
|
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates) {
|
||||||
TaskDAG dag;
|
TaskDAG dag;
|
||||||
|
|
||||||
updateDAGFromTasks(dag, tasks);
|
updateDAGFromTasks(dag, tasks);
|
||||||
dag.reset();
|
dag.reset();
|
||||||
|
|
||||||
@@ -111,7 +135,7 @@ namespace daggy {
|
|||||||
logger.updateTaskState(runID, taskName, RunState::RUNNING);
|
logger.updateTaskState(runID, taskName, RunState::RUNNING);
|
||||||
|
|
||||||
while (attempts.size() < task.maxRetries + 1) {
|
while (attempts.size() < task.maxRetries + 1) {
|
||||||
attempts.push_back(executor.runCommand(task));
|
attempts.push_back(executor.execute(task));
|
||||||
logger.logTaskAttempt(runID, taskName, attempts.back());
|
logger.logTaskAttempt(runID, taskName, attempts.back());
|
||||||
if (attempts.back().rc == 0) break;
|
if (attempts.back().rc == 0) break;
|
||||||
logger.updateTaskState(runID, taskName, RunState::RETRY);
|
logger.updateTaskState(runID, taskName, RunState::RETRY);
|
||||||
@@ -123,7 +147,7 @@ namespace daggy {
|
|||||||
executors::task::TaskExecutor &executor,
|
executors::task::TaskExecutor &executor,
|
||||||
loggers::dag_run::DAGRunLogger &logger,
|
loggers::dag_run::DAGRunLogger &logger,
|
||||||
TaskDAG dag,
|
TaskDAG dag,
|
||||||
const ParameterValues taskParameters
|
const ConfigValues parameters
|
||||||
) {
|
) {
|
||||||
logger.updateDAGRunState(runID, RunState::RUNNING);
|
logger.updateDAGRunState(runID, RunState::RUNNING);
|
||||||
|
|
||||||
@@ -146,9 +170,11 @@ namespace daggy {
|
|||||||
auto &task = vert.data;
|
auto &task = vert.data;
|
||||||
if (task.isGenerator) {
|
if (task.isGenerator) {
|
||||||
// Parse the output and update the DAGs
|
// Parse the output and update the DAGs
|
||||||
// TODO: Let the logger know about the new tasks
|
|
||||||
try {
|
try {
|
||||||
auto newTasks = tasksFromJSON(attemptRecords.back().outputLog, taskParameters);
|
auto newTasks = expandTaskSet(tasksFromJSON(attemptRecords.back().outputLog),
|
||||||
|
executor,
|
||||||
|
parameters
|
||||||
|
);
|
||||||
updateDAGFromTasks(dag, newTasks);
|
updateDAGFromTasks(dag, newTasks);
|
||||||
|
|
||||||
for (const auto &[ntName, ntTask]: newTasks) {
|
for (const auto &[ntName, ntTask]: newTasks) {
|
||||||
@@ -158,6 +184,10 @@ namespace daggy {
|
|||||||
}
|
}
|
||||||
logger.updateTask(runID, taskName, task);
|
logger.updateTask(runID, taskName, task);
|
||||||
} catch (std::exception &e) {
|
} catch (std::exception &e) {
|
||||||
|
logger.logTaskAttempt(runID, taskName,
|
||||||
|
AttemptRecord{.executorLog =
|
||||||
|
std::string{"Failed to parse JSON output: "} +
|
||||||
|
e.what()});
|
||||||
logger.updateTaskState(runID, taskName, RunState::ERRORED);
|
logger.updateTaskState(runID, taskName, RunState::ERRORED);
|
||||||
++errored;
|
++errored;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||||
|
#include <daggy/Utilities.hpp>
|
||||||
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@@ -31,23 +32,26 @@ std::string slurp(int fd) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
daggy::AttemptRecord
|
daggy::AttemptRecord
|
||||||
ForkingTaskExecutor::runCommand(const Task &task) {
|
ForkingTaskExecutor::execute(const Task &task) {
|
||||||
AttemptRecord rec;
|
AttemptRecord rec;
|
||||||
|
|
||||||
rec.startTime = Clock::now();
|
rec.startTime = Clock::now();
|
||||||
|
|
||||||
// Need to convert the strings
|
// Need to convert the strings
|
||||||
std::vector<char *> argv;
|
std::vector<char *> argv;
|
||||||
for (const auto &s : task.command) {
|
const auto command = std::get<Command>(task.job.at("command"));
|
||||||
|
for (const auto &s: command) {
|
||||||
argv.push_back(const_cast<char *>(s.c_str()));
|
argv.push_back(const_cast<char *>(s.c_str()));
|
||||||
}
|
}
|
||||||
argv.push_back(nullptr);
|
argv.push_back(nullptr);
|
||||||
|
|
||||||
// Create the pipe
|
// Create the pipe
|
||||||
int stdoutPipe[2];
|
int stdoutPipe[2];
|
||||||
pipe2(stdoutPipe, O_DIRECT);
|
int pipeRC = pipe2(stdoutPipe, O_DIRECT);
|
||||||
|
if (pipeRC != 0) throw std::runtime_error("Unable to create pipe for stdout");
|
||||||
int stderrPipe[2];
|
int stderrPipe[2];
|
||||||
pipe2(stderrPipe, O_DIRECT);
|
pipeRC = pipe2(stderrPipe, O_DIRECT);
|
||||||
|
if (pipeRC != 0) throw std::runtime_error("Unable to create pipe for stderr");
|
||||||
|
|
||||||
pid_t child = fork();
|
pid_t child = fork();
|
||||||
if (child < 0) {
|
if (child < 0) {
|
||||||
@@ -84,3 +88,26 @@ ForkingTaskExecutor::runCommand(const Task &task) {
|
|||||||
|
|
||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ForkingTaskExecutor::validateTaskParameters(const ConfigValues &job) {
|
||||||
|
auto it = job.find("command");
|
||||||
|
if (it == job.end())
|
||||||
|
throw std::runtime_error(R"(job does not have a "command" argument)");
|
||||||
|
if (!std::holds_alternative<Command>(it->second))
|
||||||
|
throw std::runtime_error(R"(taskParameter's "command" must be an array of strings)");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<daggy::ConfigValues>
|
||||||
|
ForkingTaskExecutor::expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) {
|
||||||
|
std::vector<ConfigValues> newValues;
|
||||||
|
|
||||||
|
const auto command = std::get<Command>(job.at("command"));
|
||||||
|
for (const auto &expandedCommand: interpolateValues(command, expansionValues)) {
|
||||||
|
ConfigValues newCommand{job};
|
||||||
|
newCommand.at("command") = expandedCommand;
|
||||||
|
newValues.emplace_back(newCommand);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newValues;
|
||||||
|
}
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ namespace daggy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Execution
|
// Execution
|
||||||
DAGRunID FileSystemLogger::startDAGRun(std::string name, const TaskList &tasks) {
|
DAGRunID FileSystemLogger::startDAGRun(std::string name, const TaskSet &tasks) {
|
||||||
DAGRunID runID = nextRunID_++;
|
DAGRunID runID = nextRunID_++;
|
||||||
|
|
||||||
// TODO make this threadsafe
|
// TODO make this threadsafe
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include <enum.h>
|
#include <enum.h>
|
||||||
|
|
||||||
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
||||||
|
#include <daggy/Serialization.hpp>
|
||||||
|
|
||||||
namespace daggy {
|
namespace daggy {
|
||||||
namespace loggers {
|
namespace loggers {
|
||||||
@@ -11,7 +12,7 @@ namespace daggy {
|
|||||||
OStreamLogger::OStreamLogger(std::ostream &os) : os_(os) {}
|
OStreamLogger::OStreamLogger(std::ostream &os) : os_(os) {}
|
||||||
|
|
||||||
// Execution
|
// Execution
|
||||||
DAGRunID OStreamLogger::startDAGRun(std::string name, const TaskList &tasks) {
|
DAGRunID OStreamLogger::startDAGRun(std::string name, const TaskSet &tasks) {
|
||||||
std::lock_guard<std::mutex> lock(guard_);
|
std::lock_guard<std::mutex> lock(guard_);
|
||||||
size_t runID = dagRuns_.size();
|
size_t runID = dagRuns_.size();
|
||||||
dagRuns_.push_back({
|
dagRuns_.push_back({
|
||||||
@@ -26,9 +27,7 @@ namespace daggy {
|
|||||||
os_ << "Starting new DAGRun named " << name << " with ID " << runID << " and " << tasks.size()
|
os_ << "Starting new DAGRun named " << name << " with ID " << runID << " and " << tasks.size()
|
||||||
<< " tasks" << std::endl;
|
<< " tasks" << std::endl;
|
||||||
for (const auto &[name, task]: tasks) {
|
for (const auto &[name, task]: tasks) {
|
||||||
os_ << "TASK (" << name << "): ";
|
os_ << "TASK (" << name << "): " << configToJSON(task.job);
|
||||||
std::copy(task.command.begin(), task.command.end(),
|
|
||||||
std::ostream_iterator<std::string>(os_, " "));
|
|
||||||
os_ << std::endl;
|
os_ << std::endl;
|
||||||
}
|
}
|
||||||
return runID;
|
return runID;
|
||||||
|
|||||||
@@ -12,13 +12,13 @@ namespace fs = std::filesystem;
|
|||||||
using namespace daggy;
|
using namespace daggy;
|
||||||
using namespace daggy::loggers::dag_run;
|
using namespace daggy::loggers::dag_run;
|
||||||
|
|
||||||
const TaskList SAMPLE_TASKS{
|
const TaskSet SAMPLE_TASKS{
|
||||||
{"work_a", Task{.command{"/bin/echo", "a"}, .children{"c"}}},
|
{"work_a", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}}, .children{"c"}}},
|
||||||
{"work_b", Task{.command{"/bin/echo", "b"}, .children{"c"}}},
|
{"work_b", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}}, .children{"c"}}},
|
||||||
{"work_c", Task{.command{"/bin/echo", "c"}}}
|
{"work_c", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskList &tasks) {
|
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskSet &tasks) {
|
||||||
auto runID = logger.startDAGRun(name, tasks);
|
auto runID = logger.startDAGRun(name, tasks);
|
||||||
auto dagRun = logger.getDAGRun(runID);
|
auto dagRun = logger.getDAGRun(runID);
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
|
||||||
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
||||||
|
#include "daggy/Serialization.hpp"
|
||||||
|
#include "daggy/Utilities.hpp"
|
||||||
|
|
||||||
#include <catch2/catch.hpp>
|
#include <catch2/catch.hpp>
|
||||||
|
|
||||||
@@ -9,9 +11,12 @@ TEST_CASE("Basic Execution", "[forking_executor]") {
|
|||||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||||
|
|
||||||
SECTION("Simple Run") {
|
SECTION("Simple Run") {
|
||||||
daggy::Task task{.command{"/usr/bin/echo", "abc", "123"}};
|
daggy::Task task{.job{
|
||||||
|
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/echo", "abc", "123"}}}};
|
||||||
|
|
||||||
auto rec = ex.runCommand(task);
|
REQUIRE(ex.validateTaskParameters(task.job));
|
||||||
|
|
||||||
|
auto rec = ex.execute(task);
|
||||||
|
|
||||||
REQUIRE(rec.rc == 0);
|
REQUIRE(rec.rc == 0);
|
||||||
REQUIRE(rec.outputLog.size() >= 6);
|
REQUIRE(rec.outputLog.size() >= 6);
|
||||||
@@ -19,9 +24,10 @@ TEST_CASE("Basic Execution", "[forking_executor]") {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SECTION("Error Run") {
|
SECTION("Error Run") {
|
||||||
daggy::Task task{.command{"/usr/bin/expr", "1", "+", "+"}};
|
daggy::Task task{.job{
|
||||||
|
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}};
|
||||||
|
|
||||||
auto rec = ex.runCommand(task);
|
auto rec = ex.execute(task);
|
||||||
|
|
||||||
REQUIRE(rec.rc == 2);
|
REQUIRE(rec.rc == 2);
|
||||||
REQUIRE(rec.errorLog.size() >= 20);
|
REQUIRE(rec.errorLog.size() >= 20);
|
||||||
@@ -33,16 +39,45 @@ TEST_CASE("Basic Execution", "[forking_executor]") {
|
|||||||
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli"
|
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli"
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto &bigFile : BIG_FILES) {
|
for (const auto &bigFile: BIG_FILES) {
|
||||||
if (!std::filesystem::exists(bigFile)) continue;
|
if (!std::filesystem::exists(bigFile)) continue;
|
||||||
|
|
||||||
daggy::Task task{.command{"/usr/bin/cat", bigFile}};
|
daggy::Task task{.job{
|
||||||
|
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/cat", bigFile}}}};
|
||||||
|
|
||||||
auto rec = ex.runCommand(task);
|
auto rec = ex.execute(task);
|
||||||
|
|
||||||
REQUIRE(rec.rc == 0);
|
REQUIRE(rec.rc == 0);
|
||||||
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
|
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
|
||||||
REQUIRE(rec.errorLog.empty());
|
REQUIRE(rec.errorLog.empty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
SECTION("Parameter Expansion") {
|
||||||
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||||
|
auto params = daggy::configFromJSON(testParams);
|
||||||
|
|
||||||
|
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
|
||||||
|
auto tasks = daggy::tasksFromJSON(taskJSON);
|
||||||
|
|
||||||
|
auto result = daggy::expandTaskSet(tasks, ex, params);
|
||||||
|
REQUIRE(result.size() == 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("Build with expansion") {
|
||||||
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||||
|
auto params = daggy::configFromJSON(testParams);
|
||||||
|
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
|
||||||
|
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
|
||||||
|
REQUIRE(tasks.size() == 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("Build with expansion using parents instead of children") {
|
||||||
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||||
|
auto params = daggy::configFromJSON(testParams);
|
||||||
|
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
|
||||||
|
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
|
||||||
|
|
||||||
|
REQUIRE(tasks.size() == 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,49 +11,68 @@ namespace fs = std::filesystem;
|
|||||||
TEST_CASE("Deserialize Parameters", "[deserialize_parameters]") {
|
TEST_CASE("Deserialize Parameters", "[deserialize_parameters]") {
|
||||||
SECTION("Basic Parse") {
|
SECTION("Basic Parse") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||||
auto params = daggy::parametersFromJSON(testParams);
|
auto params = daggy::configFromJSON(testParams);
|
||||||
REQUIRE(params.size() == 2);
|
REQUIRE(params.size() == 2);
|
||||||
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
|
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
|
||||||
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
|
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
|
||||||
}SECTION("Invalid JSON") {
|
}SECTION("Invalid JSON") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
|
||||||
REQUIRE_THROWS(daggy::parametersFromJSON(testParams));
|
REQUIRE_THROWS(daggy::configFromJSON(testParams));
|
||||||
}SECTION("Non-string Keys") {
|
}SECTION("Non-string Keys") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
|
||||||
REQUIRE_THROWS(daggy::parametersFromJSON(testParams));
|
REQUIRE_THROWS(daggy::configFromJSON(testParams));
|
||||||
}SECTION("Non-array/Non-string values") {
|
}SECTION("Non-array/Non-string values") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
|
||||||
REQUIRE_THROWS(daggy::parametersFromJSON(testParams));
|
REQUIRE_THROWS(daggy::configFromJSON(testParams));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Task Deserialization", "[deserialize_task]") {
|
TEST_CASE("Task Deserialization", "[deserialize_task]") {
|
||||||
SECTION("Build with no expansion") {
|
SECTION("Build with no expansion") {
|
||||||
std::string testTasks = R"({ "A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
|
std::string testTasks = R"({
|
||||||
|
"A": {
|
||||||
|
"job": { "command": ["/bin/echo", "A"] },
|
||||||
|
"children": ["C"]
|
||||||
|
},
|
||||||
|
"B": {
|
||||||
|
"job": {"command": ["/bin/echo", "B"]},
|
||||||
|
"children": ["C"]
|
||||||
|
},
|
||||||
|
"C": {
|
||||||
|
"job": {"command": ["/bin/echo", "C"]}
|
||||||
|
}
|
||||||
|
})";
|
||||||
auto tasks = daggy::tasksFromJSON(testTasks);
|
auto tasks = daggy::tasksFromJSON(testTasks);
|
||||||
REQUIRE(tasks.size() == 3);
|
REQUIRE(tasks.size() == 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("Build with expansion") {
|
SECTION("Build with job defaults") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
std::string testTasks = R"({
|
||||||
auto params = daggy::parametersFromJSON(testParams);
|
"A": {
|
||||||
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["B"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
|
"job": { "command": ["/bin/echo", "A"] },
|
||||||
auto tasks = daggy::tasksFromJSON(testTasks, params);
|
"children": ["B"]
|
||||||
REQUIRE(tasks.size() == 4);
|
},
|
||||||
}
|
"B": {
|
||||||
|
"job": {
|
||||||
SECTION("Build with expansion using parents instead of children") {
|
"command": ["/bin/echo", "C"],
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
"memory": "1G"
|
||||||
auto params = daggy::parametersFromJSON(testParams);
|
}
|
||||||
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "parents": ["A"]}, "C": {"command": ["/bin/echo", "C"], "parents": ["A"]}})";
|
}
|
||||||
auto tasks = daggy::tasksFromJSON(testTasks, params);
|
})";
|
||||||
REQUIRE(tasks.size() == 4);
|
daggy::ConfigValues jobDefaults{{"runtime", "60"},
|
||||||
|
{"memory", "300M"}};
|
||||||
|
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
|
||||||
|
REQUIRE(tasks.size() == 2);
|
||||||
|
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
|
||||||
|
REQUIRE(std::get<std::string>(tasks["A"].job["memory"]) == "300M");
|
||||||
|
REQUIRE(std::get<std::string>(tasks["B"].job["runtime"]) == "60");
|
||||||
|
REQUIRE(std::get<std::string>(tasks["B"].job["memory"]) == "1G");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Task Serialization", "[serialize_tasks]") {
|
TEST_CASE("Task Serialization", "[serialize_tasks]") {
|
||||||
SECTION("Build with no expansion") {
|
SECTION("Build with no expansion") {
|
||||||
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
|
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
|
||||||
auto tasks = daggy::tasksFromJSON(testTasks);
|
auto tasks = daggy::tasksFromJSON(testTasks);
|
||||||
|
|
||||||
auto genJSON = daggy::tasksToJSON(tasks);
|
auto genJSON = daggy::tasksToJSON(tasks);
|
||||||
|
|||||||
@@ -6,9 +6,10 @@
|
|||||||
#include <pistache/client.h>
|
#include <pistache/client.h>
|
||||||
#include <rapidjson/document.h>
|
#include <rapidjson/document.h>
|
||||||
|
|
||||||
#include "daggy/Server.hpp"
|
#include <daggy/Server.hpp>
|
||||||
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
#include <daggy/Serialization.hpp>
|
||||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||||
|
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
||||||
|
|
||||||
namespace rj = rapidjson;
|
namespace rj = rapidjson;
|
||||||
|
|
||||||
@@ -73,10 +74,10 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
|||||||
SECTION("Simple DAGRun Submission") {
|
SECTION("Simple DAGRun Submission") {
|
||||||
std::string dagRun = R"({
|
std::string dagRun = R"({
|
||||||
"name": "unit_server",
|
"name": "unit_server",
|
||||||
"taskParameters": { "FILE": [ "A", "B" ] },
|
"parameters": { "FILE": [ "A", "B" ] },
|
||||||
"tasks": {
|
"tasks": {
|
||||||
"touch": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ] },
|
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
|
||||||
"cat": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ],
|
"cat": { "job": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ]},
|
||||||
"parents": [ "touch" ]
|
"parents": [ "touch" ]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -90,8 +91,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
|||||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||||
|
|
||||||
rj::Document doc;
|
rj::Document doc;
|
||||||
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
|
daggy::checkRJParse(doc.Parse(response.body().c_str()));
|
||||||
REQUIRE(parseResult);
|
|
||||||
REQUIRE(doc.IsObject());
|
REQUIRE(doc.IsObject());
|
||||||
REQUIRE(doc.HasMember("runID"));
|
REQUIRE(doc.HasMember("runID"));
|
||||||
|
|
||||||
@@ -104,8 +104,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
|||||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||||
|
|
||||||
rj::Document doc;
|
rj::Document doc;
|
||||||
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
|
daggy::checkRJParse(doc.Parse(response.body().c_str()));
|
||||||
REQUIRE(parseResult);
|
|
||||||
REQUIRE(doc.IsArray());
|
REQUIRE(doc.IsArray());
|
||||||
REQUIRE(doc.Size() >= 1);
|
REQUIRE(doc.Size() >= 1);
|
||||||
|
|
||||||
@@ -134,8 +133,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
|||||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
||||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||||
rj::Document doc;
|
rj::Document doc;
|
||||||
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
|
daggy::checkRJParse(doc.Parse(response.body().c_str()));
|
||||||
REQUIRE(parseResult);
|
|
||||||
REQUIRE(doc.IsObject());
|
REQUIRE(doc.IsObject());
|
||||||
|
|
||||||
REQUIRE(doc.HasMember("taskStates"));
|
REQUIRE(doc.HasMember("taskStates"));
|
||||||
|
|||||||
@@ -24,18 +24,18 @@ TEST_CASE("String Utilities", "[utilities_string]") {
|
|||||||
TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") {
|
TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") {
|
||||||
SECTION("Basic expansion") {
|
SECTION("Basic expansion") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
|
||||||
auto params = daggy::parametersFromJSON(testParams);
|
auto params = daggy::configFromJSON(testParams);
|
||||||
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"};
|
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"};
|
||||||
auto allCommands = daggy::expandCommands(cmd, params);
|
auto allCommands = daggy::interpolateValues(cmd, params);
|
||||||
|
|
||||||
REQUIRE(allCommands.size() == 6);
|
REQUIRE(allCommands.size() == 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("Skip over unused parameters") {
|
SECTION("Skip over unused parameters") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
|
||||||
auto params = daggy::parametersFromJSON(testParams);
|
auto params = daggy::configFromJSON(testParams);
|
||||||
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
|
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
|
||||||
auto allCommands = daggy::expandCommands(cmd, params);
|
auto allCommands = daggy::interpolateValues(cmd, params);
|
||||||
|
|
||||||
// TYPE isn't used, so it's just |DATE| * |SOURCE|
|
// TYPE isn't used, so it's just |DATE| * |SOURCE|
|
||||||
REQUIRE(allCommands.size() == 2);
|
REQUIRE(allCommands.size() == 2);
|
||||||
@@ -44,9 +44,9 @@ TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") {
|
|||||||
SECTION("Expand within a command part") {
|
SECTION("Expand within a command part") {
|
||||||
std::string testParams{
|
std::string testParams{
|
||||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
|
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
|
||||||
auto params = daggy::parametersFromJSON(testParams);
|
auto params = daggy::configFromJSON(testParams);
|
||||||
std::vector<std::string> cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"};
|
std::vector<std::string> cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"};
|
||||||
auto result = daggy::expandCommands(cmd, params);
|
auto result = daggy::interpolateValues(cmd, params);
|
||||||
|
|
||||||
// TYPE isn't used, so it's just |DATE| * |SOURCE|
|
// TYPE isn't used, so it's just |DATE| * |SOURCE|
|
||||||
REQUIRE(result.size() == 4);
|
REQUIRE(result.size() == 4);
|
||||||
@@ -62,11 +62,11 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
|
|||||||
|
|
||||||
SECTION("Simple execution") {
|
SECTION("Simple execution") {
|
||||||
std::string prefix = "asdlk_";
|
std::string prefix = "asdlk_";
|
||||||
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
|
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
|
||||||
+ prefix + R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
|
+ prefix + R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
|
||||||
+ prefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
|
+ prefix + R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
|
||||||
+ prefix + R"(C"]}})";
|
+ prefix + R"(C"]}}})";
|
||||||
auto tasks = daggy::tasksFromJSON(taskJSON);
|
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||||
|
|
||||||
auto runID = logger.startDAGRun("test_run", tasks);
|
auto runID = logger.startDAGRun("test_run", tasks);
|
||||||
@@ -102,12 +102,13 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
|
|||||||
|
|
||||||
std::string goodPrefix = "rec_error_";
|
std::string goodPrefix = "rec_error_";
|
||||||
std::string badPrefix = "noexist/rec_error_";
|
std::string badPrefix = "noexist/rec_error_";
|
||||||
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
|
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
|
||||||
+ goodPrefix +
|
+ goodPrefix +
|
||||||
R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
|
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
|
||||||
+ badPrefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
|
+ badPrefix +
|
||||||
+ badPrefix + R"(C"]}})";
|
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
|
||||||
auto tasks = daggy::tasksFromJSON(taskJSON);
|
+ badPrefix + R"(C"]}}})";
|
||||||
|
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||||
|
|
||||||
auto runID = logger.startDAGRun("test_run", tasks);
|
auto runID = logger.startDAGRun("test_run", tasks);
|
||||||
@@ -134,22 +135,31 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
|
|||||||
|
|
||||||
SECTION("Generator tasks") {
|
SECTION("Generator tasks") {
|
||||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||||
auto params = daggy::parametersFromJSON(testParams);
|
auto params = daggy::configFromJSON(testParams);
|
||||||
|
|
||||||
|
std::string generatorOutput = R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
|
||||||
|
fs::path ofn = fs::current_path() / "generator_test_output.json";
|
||||||
|
std::ofstream ofh{ofn};
|
||||||
|
ofh << generatorOutput << std::endl;
|
||||||
|
ofh.close();
|
||||||
|
|
||||||
std::string generatorOutput = R"({"B": {"command": ["/usr/bin/echo", "{{DATE}}"], "children": ["C"]}})";
|
|
||||||
std::stringstream jsonTasks;
|
std::stringstream jsonTasks;
|
||||||
|
|
||||||
jsonTasks << R"({ "A": { "command": [ "/usr/bin/echo", )" << std::quoted(generatorOutput)
|
jsonTasks << R"({ "A": { "job": {"command": [ "/usr/bin/cat", )" << std::quoted(ofn.string())
|
||||||
<< R"(], "children": ["C"], "isGenerator": true},)"
|
<< R"(]}, "children": ["C"], "isGenerator": true},)"
|
||||||
<< R"("C": { "command": [ "/usr/bin/echo", "hello!"] } })";
|
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
|
||||||
|
|
||||||
auto tasks = daggy::tasksFromJSON(jsonTasks.str());
|
auto baseTasks = daggy::tasksFromJSON(jsonTasks.str());
|
||||||
|
REQUIRE(baseTasks.size() == 2);
|
||||||
|
auto tasks = daggy::expandTaskSet(baseTasks, ex, params);
|
||||||
|
REQUIRE(tasks.size() == 2);
|
||||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||||
REQUIRE(dag.size() == 2);
|
REQUIRE(dag.size() == 2);
|
||||||
|
|
||||||
auto runID = logger.startDAGRun("generator_run", tasks);
|
auto runID = logger.startDAGRun("generator_run", tasks);
|
||||||
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
|
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
|
||||||
|
|
||||||
|
REQUIRE(finalDAG.allVisited());
|
||||||
REQUIRE(finalDAG.size() == 4);
|
REQUIRE(finalDAG.size() == 4);
|
||||||
|
|
||||||
// Check the logger
|
// Check the logger
|
||||||
@@ -157,16 +167,15 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
|
|||||||
|
|
||||||
REQUIRE(record.tasks.size() == 4);
|
REQUIRE(record.tasks.size() == 4);
|
||||||
REQUIRE(record.taskRunStates.size() == 4);
|
REQUIRE(record.taskRunStates.size() == 4);
|
||||||
for (const auto & [taskName, attempts] : record.taskAttempts) {
|
for (const auto &[taskName, attempts]: record.taskAttempts) {
|
||||||
REQUIRE(attempts.size() == 1);
|
REQUIRE(attempts.size() == 1);
|
||||||
REQUIRE(attempts.back().rc == 0);
|
REQUIRE(attempts.back().rc == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that children were updated properly
|
// Ensure that children were updated properly
|
||||||
REQUIRE(record.tasks["A"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
|
REQUIRE(record.tasks["A"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
|
||||||
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
|
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
|
||||||
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
|
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
|
||||||
REQUIRE(record.tasks["C"].children.empty());
|
REQUIRE(record.tasks["C"].children.empty());
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user