kubeflow · google-oss-prow · Sep 3, 2024 · Jul 21, 2024 · Jul 21, 2024 · Jul 21, 2024
diff --git a/.github/workflows/template-setup-e2e-test/action.yaml b/.github/workflows/template-setup-e2e-test/action.yaml
@@ -37,7 +37,7 @@ runs:
         version: ${{ inputs.kubernetes-version }}
 
     - name: Setup Minikube Cluster
-      uses: medyagh/[email protected].16
+      uses: medyagh/[email protected].18
       with:
         network-plugin: cni
         cni: flannel

diff --git a/hack/gen-python-sdk/post_gen.py b/hack/gen-python-sdk/post_gen.py
@@ -41,6 +41,10 @@ def _rewrite_helper(input_file, output_file, rewrite_rules):
     if output_file == "sdk/python/v1beta1/kubeflow/katib/__init__.py":
         lines.append("# Import Katib API client.\n")
         lines.append("from kubeflow.katib.api.katib_client import KatibClient\n")
+        lines.append("# Import Katib TrainerResources class.\n")
+        lines.append(
+            "from kubeflow.katib.types.trainer_resources import TrainerResources\n"
+        )
         lines.append("# Import Katib report metrics functions\n")
         lines.append("from kubeflow.katib.api.report_metrics import report_metrics\n")
         lines.append("# Import Katib helper functions.\n")

diff --git a/sdk/python/v1beta1/kubeflow/katib/__init__.py b/sdk/python/v1beta1/kubeflow/katib/__init__.py
@@ -71,6 +71,8 @@
 
 # Import Katib API client.
 from kubeflow.katib.api.katib_client import KatibClient
+# Import Katib TrainerResources class.
+from kubeflow.katib.types.trainer_resources import TrainerResources
 # Import Katib report metrics functions
 from kubeflow.katib.api.report_metrics import report_metrics
 # Import Katib helper functions.

diff --git a/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py b/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py
diff --git a/sdk/python/v1beta1/kubeflow/katib/constants/constants.py b/sdk/python/v1beta1/kubeflow/katib/constants/constants.py
@@ -60,3 +60,8 @@
 BASE_IMAGE_MXNET = "docker.io/mxnet/python:1.9.1_native_py3"
 
 DEFAULT_DB_MANAGER_ADDRESS = "katib-db-manager.kubeflow:6789"
+
+# The default value for dataset and model storage PVC.
+PVC_DEFAULT_SIZE = "10Gi"
+# The default value for PVC access modes.
+PVC_DEFAULT_ACCESS_MODES = ["ReadWriteOnce", "ReadOnlyMany"]
diff --git a/sdk/python/v1beta1/kubeflow/katib/types/trainer_resources.py b/sdk/python/v1beta1/kubeflow/katib/types/trainer_resources.py
@@ -0,0 +1,10 @@
+class TrainerResources(object):
+    def __init__(
+        self,
+        num_workers=None,
+        num_procs_per_worker=None,
+        resources_per_worker=None,
+    ):
+        self.num_workers = num_workers
+        self.num_procs_per_worker = num_procs_per_worker
+        self.resources_per_worker = resources_per_worker
diff --git a/sdk/python/v1beta1/kubeflow/katib/utils/utils.py b/sdk/python/v1beta1/kubeflow/katib/utils/utils.py
@@ -12,15 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import inspect
 import json
+import logging
 import os
 import textwrap
-from typing import Any, Callable
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from kubeflow.katib import models
 from kubeflow.katib.constants import constants
 
+logger = logging.getLogger(__name__)
+
 
 def is_running_in_k8s():
     return os.path.isdir("/var/run/secrets/kubernetes.io/")
@@ -85,7 +89,6 @@ def validate_metrics_value(value: Any):
 
 
 def validate_objective_function(objective: Callable):
-
     # Check if objective function is callable.
     if not callable(objective):
         raise ValueError(
@@ -129,3 +132,138 @@ class FakeResponse:
 
     def __init__(self, obj):
         self.data = json.dumps(obj)
+
+
+class SetEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, set):
+            return list(obj)
+        if isinstance(obj, type):
+            return obj.__name__
+        return json.JSONEncoder.default(self, obj)
+
+
+def get_trial_substitutions_from_dict(
+    parameters: Dict[str, Any],
+    experiment_params: List[models.V1beta1ParameterSpec],
+    trial_params: List[models.V1beta1TrialParameterSpec],
+) -> Dict[str, str]:
+    for p_name, p_value in parameters.items():
+        # If input parameter value is Katib Experiment parameter sample.
+        if isinstance(p_value, models.V1beta1ParameterSpec):
+            # Wrap value for the function input.
+            parameters[p_name] = f"${{trialParameters.{p_name}}}"
+
+            # Add value to the Katib Experiment parameters.
+            p_value.name = p_name
+            experiment_params.append(p_value)
+
+            # Add value to the Katib Experiment's Trial parameters.
+            trial_params.append(
+                models.V1beta1TrialParameterSpec(name=p_name, reference=p_name)
+            )
+        else:
+            # Otherwise, add value to the function input.
+            parameters[p_name] = p_value
+
+    return parameters
+
+
+def get_trial_substitutions_from_trainer(
+    parameters: Union["TrainingArguments", "LoraConfig"],  # noqa: F821
+    experiment_params: List[models.V1beta1ParameterSpec],
+    trial_params: List[models.V1beta1TrialParameterSpec],
+) -> Dict[str, str]:
+    from peft import LoraConfig  # noqa: F401
+    from transformers import TrainingArguments  # noqa: F401
+
+    if isinstance(parameters, TrainingArguments):
+        parameters_dict = parameters.to_dict()
+    else:
+        parameters_dict = parameters.__dict__
+
+    for p_name, p_value in parameters_dict.items():
+        if not hasattr(parameters, p_name):
+            logger.warning(f"Training parameter {p_name} is not supported.")
+            continue
+
+        if isinstance(p_value, models.V1beta1ParameterSpec):
+            old_attr = getattr(parameters, p_name, None)
+            if old_attr is not None:
+                value = f"${{trialParameters.{p_name}}}"
+            setattr(parameters, p_name, value)
+            p_value.name = p_name
+            experiment_params.append(p_value)
+            trial_params.append(
+                models.V1beta1TrialParameterSpec(name=p_name, reference=p_name)
+            )
+        elif p_value is not None:
+            old_attr = getattr(parameters, p_name, None)
+            if old_attr is not None:
+                if isinstance(p_value, dict):
+                    # Update the existing dictionary without nesting
+                    value = copy.deepcopy(p_value)
+                else:
+                    value = type(old_attr)(p_value)
+            setattr(parameters, p_name, value)
+
+    if isinstance(parameters, TrainingArguments):
+        parameters = json.dumps(parameters.to_dict())
+    else:
+        parameters = json.dumps(parameters.__dict__, cls=SetEncoder)
+
+    return parameters
+
+
+def get_exec_script_from_objective(
+    objective: Callable,
+    input_params: Dict[str, Any] = None,
+    packages_to_install: Optional[List[str]] = None,
+    pip_index_url: str = "https://pypi.org/simple",
+) -> str:
+    """
+    Get executable script for container args from the given objective function and parameters.
+    """
+    # Validate objective function.
+    validate_objective_function(objective)
+
+    # Extract objective function implementation.
+    objective_code = inspect.getsource(objective)
+
+    # Objective function might be defined in some indented scope
+    # (e.g. in another function). We need to dedent the function code.
+    objective_code = textwrap.dedent(objective_code)
+
+    # Wrap objective function to execute it from the file. For example:
+    # def objective(parameters):
+    #     print(f'Parameters are {parameters}')
+    # objective({
+    #     'lr': '${trialParameters.lr}',
+    #     'epochs': '${trialParameters.epochs}',
+    #     'is_dist': False
+    # })
+    objective_code = f"{objective_code}\n{objective.__name__}({input_params})\n"
+
+    # Prepare execute script template.
+    exec_script = textwrap.dedent(
+        """
+        program_path=$(mktemp -d)
+        read -r -d '' SCRIPT << EOM\n
+        {objective_code}
+        EOM
+        printf "%s" "$SCRIPT" > $program_path/ephemeral_objective.py
+        python3 -u $program_path/ephemeral_objective.py"""
+    )
+
+    # Add objective code to the execute script.
+    exec_script = exec_script.format(objective_code=objective_code)
+
+    # Install Python packages if that is required.
+    if packages_to_install is not None:
+        exec_script = (
+            get_script_for_python_packages(packages_to_install, pip_index_url)
+            + exec_script
+        )
+
+    # Return executable script to execute objective function.
+    return exec_script
diff --git a/sdk/python/v1beta1/setup.py b/sdk/python/v1beta1/setup.py
@@ -85,4 +85,7 @@
         "Topic :: Software Development :: Libraries :: Python Modules",
     ],
     install_requires=REQUIRES,
+    extras_require={
+        "huggingface": ["kubeflow-training[huggingface]==1.8.0"],
+    },
 )