From 1137b275df4f65aae416009ff7d6d552844162c1 Mon Sep 17 00:00:00 2001 From: Jean-Robin Date: Wed, 29 May 2024 15:24:47 +0200 Subject: [PATCH] #709 userman update (#1333) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve Ref man for entities adding usage examples. Helps refactoring user man. * Update taipy/core/cycle/cycle.py Co-authored-by: Đỗ Trường Giang * Update taipy/core/cycle/cycle.py Co-authored-by: Đỗ Trường Giang * Update taipy/core/cycle/cycle.py Co-authored-by: Đỗ Trường Giang * Update taipy/core/job/job.py Co-authored-by: Đỗ Trường Giang * Update taipy/core/job/job.py Co-authored-by: Đỗ Trường Giang * Update taipy/core/submission/submission.py Co-authored-by: Đỗ Trường Giang * Update taipy/core/cycle/cycle.py --------- Co-authored-by: Đỗ Trường Giang --- taipy/config/common/scope.py | 37 +++++++------- taipy/core/cycle/cycle.py | 61 +++++++++++++++++++++++ taipy/core/data/data_node.py | 2 + taipy/core/job/job.py | 19 ++++++-- taipy/core/sequence/sequence.py | 75 ++++++++++++++++++++++++++++- taipy/core/submission/submission.py | 37 +++++++++++++- taipy/core/task/task.py | 7 ++- 7 files changed, 211 insertions(+), 27 deletions(-) diff --git a/taipy/config/common/scope.py b/taipy/config/common/scope.py index 5dcdccf2f0..8c75783595 100644 --- a/taipy/config/common/scope.py +++ b/taipy/config/common/scope.py @@ -43,33 +43,36 @@ class Scope(_OrderedEnum): - `CYCLE` - `SCENARIO` (Default value) - Each data node config has a scope. It is an attribute propagated to the `DataNode^` when instantiated from - a `DataNodeConfig^`. The scope is used to determine the _visibility_ of the data node, and which scenarios can - access it. + Each data node config has a scope. It is an attribute propagated to the `DataNode^` + when instantiated from a `DataNodeConfig^`. The scope is used to determine the + _visibility_ of the data node, and which scenarios can access it. In other words : - - There can be only one data node instantiated from a `DataNodeConfig^` with a `GLOBAL` scope. All the - scenarios share the unique data node. When a new scenario is created, the data node is also created if - and only if it does not exist yet. - - Only one data node instantiated from a `DataNodeConfig^` with a `CYCLE` scope is created for each cycle. - All the scenarios of the same cycle share the same data node. When a new scenario is created within a - cycle, Taipy instantiates a new data node if and only if there is no data node for the cycle yet. - - A data node that has the scope set to `SCENARIO` belongs to a unique scenario and cannot be used by others - When creating a new scenario, data nodes with a `SCENARIO` scope are systematically created along with - the new scenario. + - There can be only one data node instantiated from a `DataNodeConfig^` with a `GLOBAL` + scope. All the scenarios share the unique data node. When a new scenario is created, + the data node is also created if and only if it does not exist yet. + - Only one data node instantiated from a `DataNodeConfig^` with a `CYCLE` scope is + created for each cycle. All the scenarios of the same cycle share the same data node. + When a new scenario is created within a cycle, Taipy instantiates a new data node if + and only if there is no data node for the cycle yet. + - A data node that has the scope set to `SCENARIO` belongs to a unique scenario and cannot + be used by others. When creating a new scenario, data nodes with a `SCENARIO` scope + are systematically created along with the new scenario. !!! example - Let's consider a simple example where a company wants to predict its sales for the next month. The company - has a trained model that predicts the sales based on the current month and the historical sales. Based on - the sales forecasts the company wants to plan its production orders. The company wants to simulate two - scenarios every month: one with low capacity and one with high capacity. + Let's consider a simple example where a company wants to predict its sales for the next + month. The company has a trained model that predicts the sales based on the current month + and the historical sales. Based on the sales forecasts the company wants to plan its + production orders. The company wants to simulate two scenarios every month: one with + low capacity and one with high capacity. We can create the `DataNodeConfig^`s with the following scopes: - One data node for the historical sales with a `GLOBAL` scope. - - Three data nodes with a `CYCLE` scope, for the trained model, the current month, and the sales predictions. + - Three data nodes with a `CYCLE` scope, for the trained model, the current month, + and the sales predictions. - Two data nodes with a `SCENARIO` scope, for the capacity and the production orders. The code snippet below shows how to configure the data nodes with the different scopes: diff --git a/taipy/core/cycle/cycle.py b/taipy/core/cycle/cycle.py index 16647bd079..89c3f7b1d8 100644 --- a/taipy/core/cycle/cycle.py +++ b/taipy/core/cycle/cycle.py @@ -28,6 +28,31 @@ class Cycle(_Entity, _Labeled): """An iteration of a recurrent work pattern. + Many business operations are periodic, such as weekly predictions of sales data, monthly + master planning of supply chains, quarterly financial reports, yearly budgeting, etc. + The data applications to solve these business problems often require modeling the + corresponding periods (i.e., cycles). + + For this purpose, a `Cycle^` represents a single iteration of such a time pattern. + Each _cycle_ has a start date and a duration. Examples of cycles are: + + - Monday, 2. January 2023 as a daily cycle + - Week 01 2023, from 2. January as a weekly cycle + - January 2023 as a monthly cycle + - etc. + + `Cycle^`s are created along with the `Scenario^`s that are attached to them. + At its creation, a new scenario is attached to a single cycle, the one that + matches its optional _frequency_ and its _creation_date_. + + The possible frequencies are: + + - `Frequency.DAILY` + - `Frequency.WEEKLY` + - `Frequency.MONTHLY` + - `Frequency.QUARTERLY` + - `Frequency.YEARLY` + Attributes: id (str): The unique identifier of the cycle. frequency (Frequency^): The frequency of this cycle. @@ -36,6 +61,42 @@ class Cycle(_Entity, _Labeled): end_date (datetime): The date and time of the end of this cycle. name (str): The name of this cycle. properties (dict[str, Any]): A dictionary of additional properties. + + !!! example "Example for January cycle" + + ![cycles](../refmans/img/cycles_january_colored.svg){ align=left width="250" } + + Let's assume an end-user publishes production orders (i.e., a production plan) every + month. During each month (the cycle), he/she will be interested in experimenting with + different scenarios until only one of those scenarios is selected as the official + production plan to be published. Each month is modeled as a cycle, and each cycle + can contain one or more scenarios. + + The picture on the left shows the tree of entities: Cycles, Scenarios, and their + associated Sequence(s). There is an existing past cycle for December and a current + cycle for January containing a single scenario. + + When comes the end of a _cycle_ (start date + duration), only one of the scenarios is + applied in production. This "official" scenario is called the _**primary scenario**_. + Only one _**primary scenario**_ per cycle is allowed. + + !!! example "Example for February cycle" + + ![cycles](../pic/cycles_colored.svg){ align=left width="250" } + Now the user starts working on the February work cycle. He or she creates two + scenarios for the February cycle (one with a low capacity assumption and one with + a high capacity assumption). The user can then decide to elect the low capacity + scenario as the "official" scenario for February. To accomplish that, he just + needs to promote the low capacity scenario as _**primary**_ for the February cycle. + + The tree of entities resulting from the various scenarios created is represented + in the picture on the left. The underlined scenarios are _**primary**_. + + !!! note + + For a scenario, cycles are optional. If a scenario has no Frequency, it will not be + attached to any cycle. + """ _ID_PREFIX = "CYCLE" diff --git a/taipy/core/data/data_node.py b/taipy/core/data/data_node.py index 788c76e0f6..9e11b51b2d 100644 --- a/taipy/core/data/data_node.py +++ b/taipy/core/data/data_node.py @@ -75,6 +75,8 @@ class DataNode(_Entity, _Labeled): and use the `create_global_data_node()^` function as illustrated in the following example. + A data node's attributes are populated based on its configuration `DataNodeConfig^`. + !!! Example ```python diff --git a/taipy/core/job/job.py b/taipy/core/job/job.py index 52aecc36c0..fa23ac5412 100644 --- a/taipy/core/job/job.py +++ b/taipy/core/job/job.py @@ -42,8 +42,16 @@ def __run_callbacks(job): class Job(_Entity, _Labeled): """Execution of a `Task^`. - A job handles the status of the execution, contains the stacktrace of exceptions that were - raised during the execution, and notifies subscribers on status change. + Task, Sequence, and Scenario entities can be submitted for execution. The submission + of a scenario triggers the submission of all the contained tasks. Similarly, the submission + of a sequence also triggers the execution of all the ordered tasks. + + Every time a task is submitted for execution, a new *Job* is created. A job represents a + single execution of a task. It holds all the information related to the task execution, + including the **creation date**, the execution `Status^`, and the **stacktrace** of any + exception that may be raised by the user function. + + In addition, a job notifies scenario or sequence subscribers on its status change. Attributes: id (str): The identifier of this job. @@ -52,9 +60,10 @@ class Job(_Entity, _Labeled): not. status (Status^): The current status of this job. creation_date (datetime): The date of this job's creation. - stacktrace (List[str]): The list of stacktraces of the exceptions raised during the execution. - version (str): The string indicates the application version of the job to instantiate. If not provided, - the latest version is used. + stacktrace (List[str]): The list of stacktraces of the exceptions raised during the + execution. + version (str): The string indicates the application version of the job to instantiate. + If not provided, the latest version is used. """ _MANAGER_NAME = "job" diff --git a/taipy/core/sequence/sequence.py b/taipy/core/sequence/sequence.py index 68d7a88c1a..5262e7e7af 100644 --- a/taipy/core/sequence/sequence.py +++ b/taipy/core/sequence/sequence.py @@ -37,8 +37,79 @@ class Sequence(_Entity, Submittable, _Labeled): - """List of `Task^`s and additional attributes representing a set of data processing - elements connected as a direct acyclic graph. + """A subset of scenario tasks grouped to be executed together independently of the others. + + A sequence is attached to a `Scenario^`. It represents a subset of its tasks that need to + be executed together, independently of the other tasks in the scenario. They must form a + connected subgraph of the scenario's task graph. A scenario can hold multiple sequences. + + For instance, in a typical machine learning scenario, we may have several sequences: + a sequence dedicated to preprocessing and preparing data, a sequence for computing a + training model, and a sequence dedicated to scoring. + + !!! Example + + Let's assume we have a scenario configuration modelling a manufacturer that is + training an ML model, predicting sales forecasts, and finally, based on + the forecasts, planning its production. Three task are configured and linked + together through data nodes. + + ![sequences](../refmans/img/sequences.svg){ align=left } + + First, the sales sequence (boxed in green in the picture) contains **training** + and **predict** tasks. Second, a production sequence (boxed in dark gray in the + picture) contains the **planning** task. + + This problem has been modeled in two sequences - one sequence for the forecasting + part and one for the production planning part. As a consequence, the two algorithms + can have two different life cycles. They can run independently, under different + schedules. For example, one on a fixed schedule (e.g. every week) and one on demand, + interactively triggered by end-users. + + ```python + import taipy as tp + from taipy import Config + + def training(history): + ... + + def predict(model, month): + ... + + def planning(forecast, capacity): + ... + + # Configure data nodes + sales_history_cfg = Config.configure_csv_data_node("sales_history") + trained_model_cfg = Config.configure_data_node("trained_model") + current_month_cfg = Config.configure_data_node("current_month") + forecasts_cfg = Config.configure_data_node("sales_predictions") + capacity_cfg = Config.configure_data_node("capacity") + production_orders_cfg = Config.configure_sql_data_node("production_orders") + + # Configure tasks and scenarios + train_cfg = Config.configure_task("train", function=training, input=sales_history_cfg, output=trained_model_cfg) + predict_cfg = Config.configure_task("predict", function=predict, + input=[trained_model_cfg, current_month_cfg], + output=forecasts_cfg) + plan_cfg = Config.configure_task("planning", function=planning, + input=[forecasts_cfg, capacity_cfg], + output=production_orders_cfg) + scenario_cfg = Config.configure_scenario("scenario", task_configs=[train_cfg, predict_cfg, plan_cfg]) + + # Create a new scenario and sequences + scenario = tp.create_scenario(scenario_cfg) + scenario.add_sequence("sales_sequence", [train_cfg, predict_cfg]) + scenario.add_sequence("production_sequence", [plan_cfg]) + + # Get all sequences + all_sequences = tp.get_sequences() + + # Submit one sequence only + tp.submit(scenario.sales_sequence) + ``` + + Note that the sequences are not necessarily disjoint and may share some tasks. Attributes: properties (dict[str, Any]): A dictionary of additional properties. diff --git a/taipy/core/submission/submission.py b/taipy/core/submission/submission.py index 6e4793d74f..e755e1cd1c 100644 --- a/taipy/core/submission/submission.py +++ b/taipy/core/submission/submission.py @@ -26,7 +26,14 @@ class Submission(_Entity, _Labeled): - """Hold the jobs and submission status when a Scenario^, Sequence^ or Task^ is submitted. + """ Submission of a submittable entity: `Task^`, a `Sequence^` or a `Scenario^`. + + Task, Sequence, and Scenario entities can be submitted for execution. The submission + represents the unique request to execute a submittable entity. The submission is created + at the time the entity is submitted. + + The submission holds the jobs created by the execution of the submittable and the + `SubmissionStatus^`. The status is lively updated by Taipy during the execution of the jobs. Attributes: entity_id (str): The identifier of the entity that was submitted. @@ -37,6 +44,34 @@ class Submission(_Entity, _Labeled): submission_status (Optional[SubmissionStatus]): The current status of this submission. version (Optional[str]): The string indicates the application version of the submission to instantiate. If not provided, the latest version is used. + + !!! example + + ```python + import taipy as tp + from taipy import Config + + def by_two(x: int): + return x * 2 + + # Configure scenarios + input_cfg = Config.configure_data_node("my_input") + result_cfg = Config.configure_data_node("my_result") + task_cfg = Config.configure_task("my_double", function=by_two, input=input_cfg, output=result_cfg) + scenario_cfg = Config.configure_scenario("my_scenario", task_configs=[task_cfg]) + + # Create a new scenario from the configuration + scenario = tp.create_scenario(scenario_cfg) + + # Write the input data and submit the scenario + scenario.my_input.write(3) + submission = scenario.submit() + + # Retrieve the list of jobs, the submission status, and the creation date + jobs = submission.jobs + status = submission.submission_status + creation_date = submission.creation_date + ``` """ _ID_PREFIX = "SUBMISSION" diff --git a/taipy/core/task/task.py b/taipy/core/task/task.py index 67f44655a6..cdb9884acc 100644 --- a/taipy/core/task/task.py +++ b/taipy/core/task/task.py @@ -30,8 +30,8 @@ class Task(_Entity, _Labeled): """Hold a user function that will be executed, its parameters and the results. - A `Task` brings together the user code as function, the inputs and the outputs as data nodes - (instances of the `DataNode^` class). + A `Task` brings together the user code as function, the inputs and the outputs + as data nodes (instances of the `DataNode^` class). !!! note It is not recommended to instantiate a `Task` directly. Instead, it should be @@ -39,6 +39,9 @@ class Task(_Entity, _Labeled): the related data nodes and tasks are created automatically. Please refer to the `Scenario^` class for more information. + A task's attributes (the input data nodes, the output data nodes, the Python + function) are populated based on its task configuration `TaskConfig^`. + !!! Example ```python