From bb2b58a013fd82a4494fc3301d43c088e9242d41 Mon Sep 17 00:00:00 2001
From: Andrey Velichkevich <andrey.velichkevich@gmail.com>
Date: Fri, 22 Sep 2023 15:27:33 +0100
Subject: [PATCH] [SDK] Consolidate Naming for CRUD APIs (#1907)

* Add Flake and Black Lint

* Change SDK APIs

* Update E2E tests

* Fix a few function parameters

* Fix black format

* Fix a few comments

* Fix conftest location

* Fix Job kind in tests

* Fix client creation in test

* Fix namespace arg in get_job_conditions

* Update SDK examples with the latest changes

* Rename SDK examples

* Fix black action

* Update checkout action version

Co-authored-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

* Use Black 23.9.1 version

* Fix GitHub Action for Black

* Add unit test to create PyTorchJob from func

* Rename timeout to wait_timeout

* Validate that Job is not set with other input parameters

* Update black in developer guide

* Remove pip_index_url validation

* Use locals to verify input

* Print Job info when E2E fails

* Remove duplicated delete

---------

Co-authored-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
---
 .flake8                                       |    2 +
 .gcloudignore                                 |    7 -
 .github/workflows/test-python.yaml            |   23 +
 .pylintrc                                     |  405 ----
 .style.yapf                                   |    4 -
 docs/development/developer_guide.md           |   14 +-
 .../sdk/create-pytorchjob-from-func.ipynb     |  742 +++++++
 .../sdk/create-pytorchjob.ipynb               |  207 +-
 examples/sdk/create-tfjob.ipynb               |  405 ++++
 hack/python-sdk/post_gen.py                   |    5 +-
 .../create-pytorchjob-from-func.ipynb         |  779 --------
 sdk/python/examples/kubeflow-tfjob-sdk.ipynb  |  714 -------
 sdk/python/kubeflow/__init__.py               |    2 +-
 sdk/python/kubeflow/training/__init__.py      |    1 +
 .../kubeflow/training/api/training_client.py  | 1715 +++++------------
 .../kubeflow/training/constants/constants.py  |   88 +-
 sdk/python/kubeflow/training/utils/utils.py   |  386 ++--
 sdk/python/setup.py                           |    8 +-
 sdk/python/test/conftest.py                   |    1 +
 sdk/python/test/e2e/constants.py              |    5 +-
 sdk/python/test/e2e/test_e2e_mpijob.py        |  126 +-
 sdk/python/test/e2e/test_e2e_mxjob.py         |  150 +-
 sdk/python/test/e2e/test_e2e_paddlejob.py     |  109 +-
 sdk/python/test/e2e/test_e2e_pytorchjob.py    |  132 +-
 sdk/python/test/e2e/test_e2e_tfjob.py         |  105 +-
 sdk/python/test/e2e/test_e2e_xgboostjob.py    |  125 +-
 sdk/python/test/e2e/utils.py                  |   82 +-
 27 files changed, 2529 insertions(+), 3813 deletions(-)
 create mode 100644 .flake8
 delete mode 100644 .gcloudignore
 create mode 100644 .github/workflows/test-python.yaml
 delete mode 100644 .pylintrc
 delete mode 100644 .style.yapf
 create mode 100644 examples/sdk/create-pytorchjob-from-func.ipynb
 rename sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb => examples/sdk/create-pytorchjob.ipynb (65%)
 create mode 100644 examples/sdk/create-tfjob.ipynb
 delete mode 100644 sdk/python/examples/create-pytorchjob-from-func.ipynb
 delete mode 100644 sdk/python/examples/kubeflow-tfjob-sdk.ipynb

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000000..7da1f9608e
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 100
diff --git a/.gcloudignore b/.gcloudignore
deleted file mode 100644
index bee692559b..0000000000
--- a/.gcloudignore
+++ /dev/null
@@ -1,7 +0,0 @@
-# This file specifies files that are not uploaded to GCP
-# Uses same syntax as gitignore
-
-.git
-.gitignore
-#!include:.gitignore
-#vendor
diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml
new file mode 100644
index 0000000000..09dc39d9cb
--- /dev/null
+++ b/.github/workflows/test-python.yaml
@@ -0,0 +1,23 @@
+name: Python Test
+
+on:
+  - push
+  - pull_request
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+        # TODO (andreyvelich): We need to replace this action with script to do
+        # linting and formatting for Training Operator SDK.
+      - name: Check Python code with Black
+        uses: psf/black@stable
+        with:
+          version: 23.9.1
+          options: --check --exclude '/*kubeflow_org_v1*|__init__.py|api_client.py|configuration.py|exceptions.py|rest.py'
+          src: sdk/
diff --git a/.pylintrc b/.pylintrc
deleted file mode 100644
index 72f0baa622..0000000000
--- a/.pylintrc
+++ /dev/null
@@ -1,405 +0,0 @@
-[MASTER]
-
-# Specify a configuration file.
-#rcfile=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Add files or directories to the ignorelist. They should be base names, not
-# paths.
-ignore=third_party,jupyterhub_spawner.py,dist_mnist.py
-
-# Add files or directories matching the regex patterns to the ignorelist. The
-# regex matches against base names, not paths.
-# Ignore files that are created automatically during SDK generation.
-ignore-patterns=api_client.py,configuration.py,v1_job_condition.py,v1_job_status.py,v1_replica_spec.py,v1_replica_status.py,v1_tf_job.py,v1_tf_job_list.py,v1_tf_job_spec.py,v1_time.py,rest.py,test_v1_job_status.py,test_v1_replica_spec.py,test_v1_replica_status.py,test_v1_tf_job.py,test_v1_tf_job_list.py,test_v1_tf_job_spec.py,test_v1_job_condition.py
-
-# Pickle collected data for later comparisons.
-persistent=no
-
-# List of plugins (as comma separated values of python modules names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-# Use multiple processes to speed up Pylint.
-jobs=4
-
-# Allow loading of arbitrary C extensions. Extensions are imported into the
-# active Python interpreter and may run arbitrary code.
-unsafe-load-any-extension=no
-
-# A comma-separated list of package or module names from where C extensions may
-# be loaded. Extensions are loading into the active Python interpreter and may
-# run arbitrary code
-extension-pkg-whitelist=
-
-
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
-confidence=
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-#enable=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once).You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use"--disable=all --enable=classes
-# --disable=W"
-#
-# Kubeflow disables wrong-import-order error because we were getting different results regarding third party
-# modules when running locally vs in our test infrastructure. It looks like what counts
-# as third party might vary.
-#
-disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,missing-docstring,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,relative-import,invalid-name,bad-continuation,no-member,locally-disabled,fixme,import-error,too-many-locals,wrong-import-order
-
-
-[REPORTS]
-
-# Set the output format. Available formats are text, parseable, colorized, msvs
-# (visual studio) and html. You can also give a reporter class, eg
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Put messages in a separate file for each module / package specified on the
-# command line instead of printing them on stdout. Reports (if any) will be
-# written in a file name "pylint_global.[txt|html]". This option is deprecated
-# and it will be removed in Pylint 2.0.
-files-output=no
-
-# Tells whether to display a full report or only the messages
-reports=no
-
-# Python expression which should return a note less than 10 (10 is the highest
-# note). You have access to the variables errors warning, statement which
-# respectively contain the number of errors / warnings messages and the total
-# number of statements analyzed. This is used by the global evaluation report
-# (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details
-#msg-template=
-
-
-[BASIC]
-
-# Good variable names which should always be accepted, separated by a comma
-good-names=i,j,k,ex,Run,_
-
-# Bad variable names which should always be refused, separated by a comma
-bad-names=foo,bar,baz,toto,tutu,tata
-
-# Colon-delimited sets of names that determine each other's naming style when
-# the name regexes allow several styles.
-name-group=
-
-# Include a hint for the correct naming format with invalid-name
-include-naming-hint=no
-
-# List of decorators that produce properties, such as abc.abstractproperty. Add
-# to this list to register other decorators that produce valid properties.
-property-classes=abc.abstractproperty
-
-# Regular expression matching correct function names
-function-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for function names
-function-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct variable names
-variable-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for variable names
-variable-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct constant names
-const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Naming hint for constant names
-const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Regular expression matching correct attribute names
-attr-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for attribute names
-attr-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct argument names
-argument-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for argument names
-argument-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct class attribute names
-class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Naming hint for class attribute names
-class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Regular expression matching correct inline iteration names
-inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
-
-# Naming hint for inline iteration names
-inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
-
-# Regular expression matching correct class names
-class-rgx=[A-Z_][a-zA-Z0-9]+$
-
-# Naming hint for class names
-class-name-hint=[A-Z_][a-zA-Z0-9]+$
-
-# Regular expression matching correct module names
-module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Naming hint for module names
-module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Regular expression matching correct method names
-method-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for method names
-method-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression which should only match function or class names that do
-# not require a docstring.
-no-docstring-rgx=^_
-
-# Minimum line length for functions/classes that require docstrings, shorter
-# ones are exempt.
-docstring-min-length=-1
-
-
-[ELIF]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-
-[TYPECHECK]
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis. It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-
-[FORMAT]
-
-# Maximum number of characters on a single line.
-max-line-length=100
-
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$
-
-# Allow the body of an if to be on the same line as the test if there is no
-# else.
-single-line-if-stmt=no
-
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,dict-separator
-
-# Maximum number of lines in a module
-max-module-lines=1000
-
-# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
-# tab).
-# Use 2 spaces consistent with TensorFlow style.
-indent-string='  '
-
-# Number of spaces of indent required inside a hanging  or continued line.
-indent-after-paren=4
-
-# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
-expected-line-ending-format=
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,XXX,TODO
-
-
-[VARIABLES]
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# A regular expression matching the name of dummy variables (i.e. expectedly
-# not used).
-dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid to define new builtins when possible.
-additional-builtins=
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,_cb
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,future.builtins
-
-
-[LOGGING]
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format
-logging-modules=logging
-
-
-[SIMILARITIES]
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-
-[SPELLING]
-
-# Spelling dictionary name. Available dictionaries: none. To make it working
-# install python-enchant package.
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to indicated private dictionary in
-# --spelling-private-dict-file option instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[IMPORTS]
-
-# Deprecated modules which should not be used, separated by a comma
-deprecated-modules=regsub,TERMIOS,Bastion,rexec
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled)
-import-graph=
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled)
-ext-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled)
-int-import-graph=
-
-# Force import order to recognize a module as part of the standard
-# compatibility libraries.
-known-standard-library=
-
-# Force import order to recognize a module as part of a third party library.
-known-third-party=enchant
-
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method
-max-args=7
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore
-ignored-argument-names=_.*
-
-# Maximum number of locals for function / method body
-max-locals=15
-
-# Maximum number of return / yield for function / method body
-max-returns=6
-
-# Maximum number of branch for function / method body
-max-branches=12
-
-# Maximum number of statements in function / method body
-max-statements=50
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=0
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-# Maximum number of boolean expressions in a if statement
-max-bool-expr=5
-
-
-[CLASSES]
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,__new__,setUp
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=mcs
-
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,_fields,_replace,_source,_make
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "Exception"
-overgeneral-exceptions=Exception
diff --git a/.style.yapf b/.style.yapf
deleted file mode 100644
index 9668b4b10a..0000000000
--- a/.style.yapf
+++ /dev/null
@@ -1,4 +0,0 @@
-[style]
-based_on_style = google
-indent_width: 2
-continuation_indent_width: 2
diff --git a/docs/development/developer_guide.md b/docs/development/developer_guide.md
index 8e89d866a7..406399467e 100644
--- a/docs/development/developer_guide.md
+++ b/docs/development/developer_guide.md
@@ -112,16 +112,16 @@ sdk/python/kubeflow/training/api
 
 ### Python
 
-- Use [yapf](https://github.com/google/yapf) to format Python code
-- `yapf` style is configured in `.style.yapf` file
-- To autoformat code
+- Use [`black`](https://github.com/psf/black) to format Python code
 
-  ```sh
-  yapf -i py/**/*.py
+- Run the following to install `black`:
+
+  ```
+  pip install black==23.9.1
   ```
 
-- To sort imports
+- To check your code:
 
   ```sh
-  isort path/to/module.py
+  black --check --exclude '/*kubeflow_org_v1*|__init__.py|api_client.py|configuration.py|exceptions.py|rest.py' sdk/
   ```
diff --git a/examples/sdk/create-pytorchjob-from-func.ipynb b/examples/sdk/create-pytorchjob-from-func.ipynb
new file mode 100644
index 0000000000..5ab0cf21c2
--- /dev/null
+++ b/examples/sdk/create-pytorchjob-from-func.ipynb
@@ -0,0 +1,742 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "90d43b56-97e5-45e2-8e67-4488ed31d2df",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Run PyTorchJob From Function\n",
+    "\n",
+    "In this Notebook we are going to create [Kubeflow PyTorchJob](https://www.kubeflow.org/docs/components/training/pytorch/).\n",
+    "\n",
+    "The PyTorchJob will run distributive training using [DistributedDataParallel strategy](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8bb6564-fde3-4c28-841c-012122643dd9",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Install Kubeflow Python SDKs\n",
+    "\n",
+    "You need to install PyTorch packages and Kubeflow SDKs to run this Notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d49f072e-2221-48bb-9f6d-561713d1a45c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install torch==1.12.1\n",
+    "!pip install torchvision==0.13.1\n",
+    "\n",
+    "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n",
+    "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e9331a05-9127-4b3a-8077-31157e267827",
+   "metadata": {},
+   "source": [
+    "## Create Train Script for CNN Model\n",
+    "\n",
+    "This is simple **Convolutional Neural Network (CNN)** model for recognizing different picture of clothing using [Fashion MNIST Dataset](https://github.com/zalandoresearch/fashion-mnist)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "69f21f33-5c64-452c-90c4-977fc0dadb3b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def train_pytorch_model():\n",
+    "    import logging\n",
+    "    import os\n",
+    "    from torchvision import transforms, datasets\n",
+    "    import torch\n",
+    "    from torch import nn\n",
+    "    import torch.nn.functional as F\n",
+    "    import torch.distributed as dist\n",
+    "\n",
+    "    logging.basicConfig(\n",
+    "        format=\"%(asctime)s %(levelname)-8s %(message)s\",\n",
+    "        datefmt=\"%Y-%m-%dT%H:%M:%SZ\",\n",
+    "        level=logging.DEBUG,\n",
+    "    )\n",
+    "\n",
+    "    # Create PyTorch CNN Model.\n",
+    "    class Net(nn.Module):\n",
+    "        def __init__(self):\n",
+    "            super(Net, self).__init__()\n",
+    "            self.conv1 = nn.Conv2d(1, 20, 5, 1)\n",
+    "            self.conv2 = nn.Conv2d(20, 50, 5, 1)\n",
+    "            self.fc1 = nn.Linear(4 * 4 * 50, 500)\n",
+    "            self.fc2 = nn.Linear(500, 10)\n",
+    "\n",
+    "        def forward(self, x):\n",
+    "            x = F.relu(self.conv1(x))\n",
+    "            x = F.max_pool2d(x, 2, 2)\n",
+    "            x = F.relu(self.conv2(x))\n",
+    "            x = F.max_pool2d(x, 2, 2)\n",
+    "            x = x.view(-1, 4 * 4 * 50)\n",
+    "            x = F.relu(self.fc1(x))\n",
+    "            x = self.fc2(x)\n",
+    "            return F.log_softmax(x, dim=1)\n",
+    "\n",
+    "    # Get dist parameters.\n",
+    "    # Kubeflow Training Operator automatically set appropriate RANK and WORLD_SIZE based on the configuration.\n",
+    "    RANK = int(os.environ[\"RANK\"])\n",
+    "    WORLD_SIZE = int(os.environ[\"WORLD_SIZE\"])\n",
+    "    \n",
+    "    model = Net()\n",
+    "    # Attach model to DistributedDataParallel strategy.\n",
+    "    dist.init_process_group(backend=\"gloo\", rank=RANK, world_size=WORLD_SIZE)\n",
+    "    Distributor = nn.parallel.DistributedDataParallel\n",
+    "    model = Distributor(model)\n",
+    "\n",
+    "    # Split batch size for each worker.\n",
+    "    batch_size = int(128 / WORLD_SIZE)\n",
+    "\n",
+    "    # Get Fashion MNIST DataSet.\n",
+    "    train_loader = torch.utils.data.DataLoader(\n",
+    "        datasets.FashionMNIST(\n",
+    "            \"./data\",\n",
+    "            train=True,\n",
+    "            download=True,\n",
+    "            transform=transforms.Compose([transforms.ToTensor()]),\n",
+    "        ),\n",
+    "        batch_size=batch_size,\n",
+    "    )\n",
+    "\n",
+    "    # Start Training.\n",
+    "    logging.info(f\"Start training for RANK: {RANK}. WORLD_SIZE: {WORLD_SIZE}\")\n",
+    "    for epoch in range(1):\n",
+    "        model.train()\n",
+    "        optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)\n",
+    "\n",
+    "        for batch_idx, (data, target) in enumerate(train_loader):\n",
+    "            optimizer.zero_grad()\n",
+    "            output = model(data)\n",
+    "            loss = F.nll_loss(output, target)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "            if batch_idx % 10 == 0:\n",
+    "                logging.info(\n",
+    "                    \"Train Epoch: {} [{}/{} ({:.0f}%)]\\tloss={:.4f}\".format(\n",
+    "                        epoch,\n",
+    "                        batch_idx * len(data),\n",
+    "                        len(train_loader.dataset),\n",
+    "                        100.0 * batch_idx / len(train_loader),\n",
+    "                        loss.item(),\n",
+    "                    )\n",
+    "                )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8cfe8739-1f94-476a-80e3-dd6e3237d9ed",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2022-09-01T19:32:37.813779Z",
+     "iopub.status.busy": "2022-09-01T19:32:37.812759Z",
+     "iopub.status.idle": "2022-09-01T19:32:37.827050Z",
+     "shell.execute_reply": "2022-09-01T19:32:37.825186Z",
+     "shell.execute_reply.started": "2022-09-01T19:32:37.813690Z"
+    }
+   },
+   "source": [
+    "## Run Training Locally in the Notebook\n",
+    "\n",
+    "We are going to download Fashion MNIST Dataset and start local training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "9e2c6fd8-d0ba-4bc6-ac90-d4cf09751ace",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/avelichk/miniconda3/envs/training-operator/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "2023-09-08T22:00:27Z INFO     Added key: store_based_barrier_key:1 to store for rank: 0\n",
+      "2023-09-08T22:00:27Z INFO     Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 1 nodes.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26421880/26421880 [00:01<00:00, 22627052.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29515/29515 [00:00<00:00, 1596941.21it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4422102/4422102 [00:00<00:00, 20494516.72it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5148/5148 [00:00<00:00, 8510948.76it/s]\n",
+      "2023-09-08T22:00:30Z INFO     Start training for RANK: 0. WORLD_SIZE: 1\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-09-08T22:00:30Z INFO     Train Epoch: 0 [0/60000 (0%)]\tloss=2.2989\n",
+      "2023-09-08T22:00:30Z INFO     Reducer buckets have been rebuilt in this iteration.\n",
+      "2023-09-08T22:00:30Z INFO     Train Epoch: 0 [1280/60000 (2%)]\tloss=2.2991\n",
+      "2023-09-08T22:00:30Z INFO     Train Epoch: 0 [2560/60000 (4%)]\tloss=2.2857\n",
+      "2023-09-08T22:00:31Z INFO     Train Epoch: 0 [3840/60000 (6%)]\tloss=2.2795\n",
+      "2023-09-08T22:00:31Z INFO     Train Epoch: 0 [5120/60000 (9%)]\tloss=2.2625\n",
+      "2023-09-08T22:00:31Z INFO     Train Epoch: 0 [6400/60000 (11%)]\tloss=2.2526\n",
+      "2023-09-08T22:00:32Z INFO     Train Epoch: 0 [7680/60000 (13%)]\tloss=2.2245\n",
+      "2023-09-08T22:00:32Z INFO     Train Epoch: 0 [8960/60000 (15%)]\tloss=2.1893\n",
+      "2023-09-08T22:00:32Z INFO     Train Epoch: 0 [10240/60000 (17%)]\tloss=2.1507\n",
+      "2023-09-08T22:00:33Z INFO     Train Epoch: 0 [11520/60000 (19%)]\tloss=2.0778\n",
+      "2023-09-08T22:00:33Z INFO     Train Epoch: 0 [12800/60000 (21%)]\tloss=1.9957\n",
+      "2023-09-08T22:00:34Z INFO     Train Epoch: 0 [14080/60000 (23%)]\tloss=1.9257\n",
+      "2023-09-08T22:00:34Z INFO     Train Epoch: 0 [15360/60000 (26%)]\tloss=1.7212\n",
+      "2023-09-08T22:00:34Z INFO     Train Epoch: 0 [16640/60000 (28%)]\tloss=1.5281\n",
+      "2023-09-08T22:00:35Z INFO     Train Epoch: 0 [17920/60000 (30%)]\tloss=1.3686\n",
+      "2023-09-08T22:00:35Z INFO     Train Epoch: 0 [19200/60000 (32%)]\tloss=1.2350\n",
+      "2023-09-08T22:00:35Z INFO     Train Epoch: 0 [20480/60000 (34%)]\tloss=1.1473\n",
+      "2023-09-08T22:00:36Z INFO     Train Epoch: 0 [21760/60000 (36%)]\tloss=1.1870\n",
+      "2023-09-08T22:00:36Z INFO     Train Epoch: 0 [23040/60000 (38%)]\tloss=1.0766\n",
+      "2023-09-08T22:00:36Z INFO     Train Epoch: 0 [24320/60000 (41%)]\tloss=1.0574\n",
+      "2023-09-08T22:00:37Z INFO     Train Epoch: 0 [25600/60000 (43%)]\tloss=0.9557\n",
+      "2023-09-08T22:00:37Z INFO     Train Epoch: 0 [26880/60000 (45%)]\tloss=0.9279\n",
+      "2023-09-08T22:00:37Z INFO     Train Epoch: 0 [28160/60000 (47%)]\tloss=0.9588\n",
+      "2023-09-08T22:00:38Z INFO     Train Epoch: 0 [29440/60000 (49%)]\tloss=1.0970\n",
+      "2023-09-08T22:00:38Z INFO     Train Epoch: 0 [30720/60000 (51%)]\tloss=0.9617\n",
+      "2023-09-08T22:00:38Z INFO     Train Epoch: 0 [32000/60000 (53%)]\tloss=0.9025\n",
+      "2023-09-08T22:00:39Z INFO     Train Epoch: 0 [33280/60000 (55%)]\tloss=0.8363\n",
+      "2023-09-08T22:00:39Z INFO     Train Epoch: 0 [34560/60000 (58%)]\tloss=0.9448\n",
+      "2023-09-08T22:00:39Z INFO     Train Epoch: 0 [35840/60000 (60%)]\tloss=0.7507\n",
+      "2023-09-08T22:00:40Z INFO     Train Epoch: 0 [37120/60000 (62%)]\tloss=0.7347\n",
+      "2023-09-08T22:00:40Z INFO     Train Epoch: 0 [38400/60000 (64%)]\tloss=0.6985\n",
+      "2023-09-08T22:00:40Z INFO     Train Epoch: 0 [39680/60000 (66%)]\tloss=0.8104\n",
+      "2023-09-08T22:00:41Z INFO     Train Epoch: 0 [40960/60000 (68%)]\tloss=0.8177\n",
+      "2023-09-08T22:00:41Z INFO     Train Epoch: 0 [42240/60000 (70%)]\tloss=0.8442\n",
+      "2023-09-08T22:00:41Z INFO     Train Epoch: 0 [43520/60000 (72%)]\tloss=0.7311\n",
+      "2023-09-08T22:00:42Z INFO     Train Epoch: 0 [44800/60000 (75%)]\tloss=0.7861\n",
+      "2023-09-08T22:00:42Z INFO     Train Epoch: 0 [46080/60000 (77%)]\tloss=0.7879\n",
+      "2023-09-08T22:00:42Z INFO     Train Epoch: 0 [47360/60000 (79%)]\tloss=0.7863\n",
+      "2023-09-08T22:00:43Z INFO     Train Epoch: 0 [48640/60000 (81%)]\tloss=0.8808\n",
+      "2023-09-08T22:00:43Z INFO     Train Epoch: 0 [49920/60000 (83%)]\tloss=0.7993\n",
+      "2023-09-08T22:00:43Z INFO     Train Epoch: 0 [51200/60000 (85%)]\tloss=0.7540\n",
+      "2023-09-08T22:00:44Z INFO     Train Epoch: 0 [52480/60000 (87%)]\tloss=0.8387\n",
+      "2023-09-08T22:00:44Z INFO     Train Epoch: 0 [53760/60000 (90%)]\tloss=0.7911\n",
+      "2023-09-08T22:00:44Z INFO     Train Epoch: 0 [55040/60000 (92%)]\tloss=0.6176\n",
+      "2023-09-08T22:00:45Z INFO     Train Epoch: 0 [56320/60000 (94%)]\tloss=0.6854\n",
+      "2023-09-08T22:00:45Z INFO     Train Epoch: 0 [57600/60000 (96%)]\tloss=0.7593\n",
+      "2023-09-08T22:00:45Z INFO     Train Epoch: 0 [58880/60000 (98%)]\tloss=0.7477\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Set dist env variables to run the above training locally on the Notebook.\n",
+    "import os\n",
+    "os.environ[\"RANK\"] = \"0\"\n",
+    "os.environ[\"WORLD_SIZE\"] = \"1\"\n",
+    "os.environ[\"MASTER_ADDR\"] = \"localhost\"\n",
+    "os.environ[\"MASTER_PORT\"] = \"1234\"\n",
+    "\n",
+    "# Train Model locally in the Notebook.\n",
+    "train_pytorch_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5aae47e3-be31-468e-8f38-89e1e2f1c764",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Start Distributive Training with PyTorchJob\n",
+    "\n",
+    "Before creating PyTorchJob, you have to create `TrainingClient()`. It uses [Kubernetes Python client](https://github.com/kubernetes-client/python) to communicate with Kubernetes API server. You can set path and context for [the kubeconfig file](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/). The default location for the kubeconfig is `~/.kube/config`.\n",
+    "\n",
+    "Kubeflow Training Operator automatically set the appropriate env variables (`MASTER_PORT`, `MASTER_ADDR`, `WORLD_SIZE`, `RANK`) for each PyTorchJob container."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "eb1acd34-ebcf-409b-8bb3-0225cee37110",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-09-08T22:01:42Z INFO     PyTorchJob default/train-pytorch has been created\n"
+     ]
+    }
+   ],
+   "source": [
+    "from kubeflow.training import TrainingClient\n",
+    "from kubeflow.training import constants\n",
+    "\n",
+    "# Start PyTorchJob Training.\n",
+    "pytorchjob_name = \"train-pytorch\"\n",
+    "\n",
+    "# Since we set `job_kind = PyTorchJob` APIs are going to use PyTorchJob as a default Job kind.\n",
+    "training_client = TrainingClient(job_kind=constants.PYTORCHJOB_KIND)\n",
+    "\n",
+    "training_client.create_job(\n",
+    "    name=pytorchjob_name,\n",
+    "    train_func=train_pytorch_model,\n",
+    "    num_worker_replicas=3, # How many PyTorch Workers will be created.\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e44c3ad7-62c4-4b58-b52a-15fd8746b772",
+   "metadata": {},
+   "source": [
+    "### Check PyTorchJob Status\n",
+    "\n",
+    "Use `KubeflowClient` APIs to get information about created PyTorchJob."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "4141f6c2-c38f-4972-b68a-35d150ef7485",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PyTorchJob Status: True\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"PyTorchJob Status: {training_client.is_job_running(name=pytorchjob_name)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "42e10587-7ac2-45bf-9c4f-d418e1585974",
+   "metadata": {},
+   "source": [
+    "### Get PyTorchJob Pod Names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "49b53308-a19b-45e8-942f-4333e727ee48",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['train-pytorch-master-0',\n",
+       " 'train-pytorch-worker-0',\n",
+       " 'train-pytorch-worker-1',\n",
+       " 'train-pytorch-worker-2']"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "training_client.get_job_pod_names(pytorchjob_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b91d332d-487c-4a95-937d-26ffb6199cda",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2022-09-01T20:10:25.759950Z",
+     "iopub.status.idle": "2022-09-01T20:10:25.760581Z",
+     "shell.execute_reply": "2022-09-01T20:10:25.760353Z",
+     "shell.execute_reply.started": "2022-09-01T20:10:25.760328Z"
+    },
+    "tags": []
+   },
+   "source": [
+    "### Get PyTorchJob Training Logs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5232d542-d4bf-4c51-8b11-ad0534fb0b9d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-09-08T22:10:08Z INFO     The logs of pod train-pytorch-master-0:\n",
+      " 2023-09-08T21:01:59Z INFO     Added key: store_based_barrier_key:1 to store for rank: 0\n",
+      "2023-09-08T21:01:59Z INFO     Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz\n",
+      "100%|██████████| 26421880/26421880 [00:02<00:00, 12793779.84it/s]\n",
+      "Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n",
+      "100%|██████████| 29515/29515 [00:00<00:00, 209261.44it/s]\n",
+      "Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n",
+      "100%|██████████| 4422102/4422102 [00:01<00:00, 3953124.28it/s]\n",
+      "Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n",
+      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n",
+      "100%|██████████| 5148/5148 [00:00<00:00, 53182948.26it/s]\n",
+      "Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
+      "\n",
+      "2023-09-08T21:02:05Z INFO     Start training for RANK: 0. WORLD_SIZE: 4\n",
+      "2023-09-08T21:02:05Z INFO     Train Epoch: 0 [0/60000 (0%)]\tloss=2.3026\n",
+      "2023-09-08T21:02:05Z INFO     Reducer buckets have been rebuilt in this iteration.\n",
+      "2023-09-08T21:02:07Z INFO     Train Epoch: 0 [320/60000 (1%)]\tloss=2.2942\n",
+      "2023-09-08T21:02:10Z INFO     Train Epoch: 0 [640/60000 (1%)]\tloss=2.2931\n",
+      "2023-09-08T21:02:12Z INFO     Train Epoch: 0 [960/60000 (2%)]\tloss=2.2750\n",
+      "2023-09-08T21:02:14Z INFO     Train Epoch: 0 [1280/60000 (2%)]\tloss=2.2734\n",
+      "2023-09-08T21:02:17Z INFO     Train Epoch: 0 [1600/60000 (3%)]\tloss=2.2644\n",
+      "2023-09-08T21:02:19Z INFO     Train Epoch: 0 [1920/60000 (3%)]\tloss=2.2451\n",
+      "2023-09-08T21:02:21Z INFO     Train Epoch: 0 [2240/60000 (4%)]\tloss=2.1874\n",
+      "2023-09-08T21:02:23Z INFO     Train Epoch: 0 [2560/60000 (4%)]\tloss=2.2048\n",
+      "2023-09-08T21:02:25Z INFO     Train Epoch: 0 [2880/60000 (5%)]\tloss=2.1906\n",
+      "2023-09-08T21:02:26Z INFO     Train Epoch: 0 [3200/60000 (5%)]\tloss=2.1379\n",
+      "2023-09-08T21:02:29Z INFO     Train Epoch: 0 [3520/60000 (6%)]\tloss=2.0556\n",
+      "2023-09-08T21:02:31Z INFO     Train Epoch: 0 [3840/60000 (6%)]\tloss=1.8509\n",
+      "2023-09-08T21:02:32Z INFO     Train Epoch: 0 [4160/60000 (7%)]\tloss=1.6425\n",
+      "2023-09-08T21:02:34Z INFO     Train Epoch: 0 [4480/60000 (7%)]\tloss=1.6744\n",
+      "2023-09-08T21:02:36Z INFO     Train Epoch: 0 [4800/60000 (8%)]\tloss=1.3866\n",
+      "2023-09-08T21:02:38Z INFO     Train Epoch: 0 [5120/60000 (9%)]\tloss=1.2680\n",
+      "2023-09-08T21:02:41Z INFO     Train Epoch: 0 [5440/60000 (9%)]\tloss=1.2594\n",
+      "2023-09-08T21:02:43Z INFO     Train Epoch: 0 [5760/60000 (10%)]\tloss=1.3052\n",
+      "2023-09-08T21:02:45Z INFO     Train Epoch: 0 [6080/60000 (10%)]\tloss=1.1057\n",
+      "2023-09-08T21:02:47Z INFO     Train Epoch: 0 [6400/60000 (11%)]\tloss=1.0416\n",
+      "2023-09-08T21:02:49Z INFO     Train Epoch: 0 [6720/60000 (11%)]\tloss=1.2431\n",
+      "2023-09-08T21:02:50Z INFO     Train Epoch: 0 [7040/60000 (12%)]\tloss=0.9392\n",
+      "2023-09-08T21:02:53Z INFO     Train Epoch: 0 [7360/60000 (12%)]\tloss=0.9794\n",
+      "2023-09-08T21:02:55Z INFO     Train Epoch: 0 [7680/60000 (13%)]\tloss=0.9787\n",
+      "2023-09-08T21:02:57Z INFO     Train Epoch: 0 [8000/60000 (13%)]\tloss=1.2992\n",
+      "2023-09-08T21:02:59Z INFO     Train Epoch: 0 [8320/60000 (14%)]\tloss=1.0311\n",
+      "2023-09-08T21:03:01Z INFO     Train Epoch: 0 [8640/60000 (14%)]\tloss=1.0544\n",
+      "2023-09-08T21:03:02Z INFO     Train Epoch: 0 [8960/60000 (15%)]\tloss=1.1326\n",
+      "2023-09-08T21:03:04Z INFO     Train Epoch: 0 [9280/60000 (15%)]\tloss=0.6292\n",
+      "2023-09-08T21:03:06Z INFO     Train Epoch: 0 [9600/60000 (16%)]\tloss=1.2502\n",
+      "2023-09-08T21:03:08Z INFO     Train Epoch: 0 [9920/60000 (17%)]\tloss=0.8754\n",
+      "2023-09-08T21:03:10Z INFO     Train Epoch: 0 [10240/60000 (17%)]\tloss=1.0590\n",
+      "2023-09-08T21:03:13Z INFO     Train Epoch: 0 [10560/60000 (18%)]\tloss=1.0957\n",
+      "2023-09-08T21:03:15Z INFO     Train Epoch: 0 [10880/60000 (18%)]\tloss=0.9105\n",
+      "2023-09-08T21:03:17Z INFO     Train Epoch: 0 [11200/60000 (19%)]\tloss=0.6360\n",
+      "2023-09-08T21:03:19Z INFO     Train Epoch: 0 [11520/60000 (19%)]\tloss=0.9720\n",
+      "2023-09-08T21:03:21Z INFO     Train Epoch: 0 [11840/60000 (20%)]\tloss=1.1181\n",
+      "2023-09-08T21:03:23Z INFO     Train Epoch: 0 [12160/60000 (20%)]\tloss=0.9001\n",
+      "2023-09-08T21:03:25Z INFO     Train Epoch: 0 [12480/60000 (21%)]\tloss=0.6984\n",
+      "2023-09-08T21:03:27Z INFO     Train Epoch: 0 [12800/60000 (21%)]\tloss=0.7768\n",
+      "2023-09-08T21:03:30Z INFO     Train Epoch: 0 [13120/60000 (22%)]\tloss=1.1038\n",
+      "2023-09-08T21:03:32Z INFO     Train Epoch: 0 [13440/60000 (22%)]\tloss=0.8548\n",
+      "2023-09-08T21:03:34Z INFO     Train Epoch: 0 [13760/60000 (23%)]\tloss=0.8793\n",
+      "2023-09-08T21:03:37Z INFO     Train Epoch: 0 [14080/60000 (23%)]\tloss=0.8937\n",
+      "2023-09-08T21:03:39Z INFO     Train Epoch: 0 [14400/60000 (24%)]\tloss=0.8367\n",
+      "2023-09-08T21:03:42Z INFO     Train Epoch: 0 [14720/60000 (25%)]\tloss=0.6917\n",
+      "2023-09-08T21:03:45Z INFO     Train Epoch: 0 [15040/60000 (25%)]\tloss=0.8002\n",
+      "2023-09-08T21:03:47Z INFO     Train Epoch: 0 [15360/60000 (26%)]\tloss=0.9557\n",
+      "2023-09-08T21:03:48Z INFO     Train Epoch: 0 [15680/60000 (26%)]\tloss=0.7246\n",
+      "2023-09-08T21:03:50Z INFO     Train Epoch: 0 [16000/60000 (27%)]\tloss=1.0920\n",
+      "2023-09-08T21:03:52Z INFO     Train Epoch: 0 [16320/60000 (27%)]\tloss=0.4943\n",
+      "2023-09-08T21:03:54Z INFO     Train Epoch: 0 [16640/60000 (28%)]\tloss=0.9251\n",
+      "2023-09-08T21:03:55Z INFO     Train Epoch: 0 [16960/60000 (28%)]\tloss=0.6982\n",
+      "2023-09-08T21:03:58Z INFO     Train Epoch: 0 [17280/60000 (29%)]\tloss=0.7784\n",
+      "2023-09-08T21:04:00Z INFO     Train Epoch: 0 [17600/60000 (29%)]\tloss=0.6317\n",
+      "2023-09-08T21:04:02Z INFO     Train Epoch: 0 [17920/60000 (30%)]\tloss=0.6022\n",
+      "2023-09-08T21:04:04Z INFO     Train Epoch: 0 [18240/60000 (30%)]\tloss=1.1098\n",
+      "2023-09-08T21:04:06Z INFO     Train Epoch: 0 [18560/60000 (31%)]\tloss=1.1230\n",
+      "2023-09-08T21:04:08Z INFO     Train Epoch: 0 [18880/60000 (31%)]\tloss=0.7113\n",
+      "2023-09-08T21:04:10Z INFO     Train Epoch: 0 [19200/60000 (32%)]\tloss=0.5611\n",
+      "2023-09-08T21:04:12Z INFO     Train Epoch: 0 [19520/60000 (33%)]\tloss=0.8134\n",
+      "2023-09-08T21:04:14Z INFO     Train Epoch: 0 [19840/60000 (33%)]\tloss=0.8513\n",
+      "2023-09-08T21:04:16Z INFO     Train Epoch: 0 [20160/60000 (34%)]\tloss=1.1050\n",
+      "2023-09-08T21:04:18Z INFO     Train Epoch: 0 [20480/60000 (34%)]\tloss=0.5541\n",
+      "2023-09-08T21:04:20Z INFO     Train Epoch: 0 [20800/60000 (35%)]\tloss=0.9637\n",
+      "2023-09-08T21:04:22Z INFO     Train Epoch: 0 [21120/60000 (35%)]\tloss=0.4796\n",
+      "2023-09-08T21:04:24Z INFO     Train Epoch: 0 [21440/60000 (36%)]\tloss=0.9878\n",
+      "2023-09-08T21:04:26Z INFO     Train Epoch: 0 [21760/60000 (36%)]\tloss=0.6691\n",
+      "2023-09-08T21:04:28Z INFO     Train Epoch: 0 [22080/60000 (37%)]\tloss=0.7739\n",
+      "2023-09-08T21:04:31Z INFO     Train Epoch: 0 [22400/60000 (37%)]\tloss=0.5405\n",
+      "2023-09-08T21:04:32Z INFO     Train Epoch: 0 [22720/60000 (38%)]\tloss=0.6155\n",
+      "2023-09-08T21:04:35Z INFO     Train Epoch: 0 [23040/60000 (38%)]\tloss=1.0303\n",
+      "2023-09-08T21:04:37Z INFO     Train Epoch: 0 [23360/60000 (39%)]\tloss=0.5421\n",
+      "2023-09-08T21:04:39Z INFO     Train Epoch: 0 [23680/60000 (39%)]\tloss=0.7717\n",
+      "2023-09-08T21:04:41Z INFO     Train Epoch: 0 [24000/60000 (40%)]\tloss=0.8697\n",
+      "2023-09-08T21:04:43Z INFO     Train Epoch: 0 [24320/60000 (41%)]\tloss=0.7996\n",
+      "2023-09-08T21:04:44Z INFO     Train Epoch: 0 [24640/60000 (41%)]\tloss=0.6494\n",
+      "2023-09-08T21:04:46Z INFO     Train Epoch: 0 [24960/60000 (42%)]\tloss=0.7669\n",
+      "2023-09-08T21:04:48Z INFO     Train Epoch: 0 [25280/60000 (42%)]\tloss=0.4775\n",
+      "2023-09-08T21:04:50Z INFO     Train Epoch: 0 [25600/60000 (43%)]\tloss=0.7363\n",
+      "2023-09-08T21:04:51Z INFO     Train Epoch: 0 [25920/60000 (43%)]\tloss=0.5954\n",
+      "2023-09-08T21:04:53Z INFO     Train Epoch: 0 [26240/60000 (44%)]\tloss=0.9329\n",
+      "2023-09-08T21:04:55Z INFO     Train Epoch: 0 [26560/60000 (44%)]\tloss=0.7000\n",
+      "2023-09-08T21:04:57Z INFO     Train Epoch: 0 [26880/60000 (45%)]\tloss=0.5993\n",
+      "2023-09-08T21:04:59Z INFO     Train Epoch: 0 [27200/60000 (45%)]\tloss=0.9582\n",
+      "2023-09-08T21:05:01Z INFO     Train Epoch: 0 [27520/60000 (46%)]\tloss=0.4871\n",
+      "2023-09-08T21:05:03Z INFO     Train Epoch: 0 [27840/60000 (46%)]\tloss=0.6944\n",
+      "2023-09-08T21:05:06Z INFO     Train Epoch: 0 [28160/60000 (47%)]\tloss=0.7795\n",
+      "2023-09-08T21:05:08Z INFO     Train Epoch: 0 [28480/60000 (47%)]\tloss=0.7967\n",
+      "2023-09-08T21:05:10Z INFO     Train Epoch: 0 [28800/60000 (48%)]\tloss=0.9489\n",
+      "2023-09-08T21:05:12Z INFO     Train Epoch: 0 [29120/60000 (49%)]\tloss=0.6331\n",
+      "2023-09-08T21:05:14Z INFO     Train Epoch: 0 [29440/60000 (49%)]\tloss=0.9203\n",
+      "2023-09-08T21:05:16Z INFO     Train Epoch: 0 [29760/60000 (50%)]\tloss=0.7250\n",
+      "2023-09-08T21:05:18Z INFO     Train Epoch: 0 [30080/60000 (50%)]\tloss=1.0080\n",
+      "2023-09-08T21:05:20Z INFO     Train Epoch: 0 [30400/60000 (51%)]\tloss=0.6063\n",
+      "2023-09-08T21:05:23Z INFO     Train Epoch: 0 [30720/60000 (51%)]\tloss=0.6403\n",
+      "2023-09-08T21:05:24Z INFO     Train Epoch: 0 [31040/60000 (52%)]\tloss=0.4953\n",
+      "2023-09-08T21:05:26Z INFO     Train Epoch: 0 [31360/60000 (52%)]\tloss=0.4997\n",
+      "2023-09-08T21:05:28Z INFO     Train Epoch: 0 [31680/60000 (53%)]\tloss=0.7053\n",
+      "2023-09-08T21:05:30Z INFO     Train Epoch: 0 [32000/60000 (53%)]\tloss=0.7847\n",
+      "2023-09-08T21:05:32Z INFO     Train Epoch: 0 [32320/60000 (54%)]\tloss=0.5874\n",
+      "2023-09-08T21:05:34Z INFO     Train Epoch: 0 [32640/60000 (54%)]\tloss=0.6826\n",
+      "2023-09-08T21:05:36Z INFO     Train Epoch: 0 [32960/60000 (55%)]\tloss=0.5787\n",
+      "2023-09-08T21:05:39Z INFO     Train Epoch: 0 [33280/60000 (55%)]\tloss=0.5482\n",
+      "2023-09-08T21:05:41Z INFO     Train Epoch: 0 [33600/60000 (56%)]\tloss=0.5237\n",
+      "2023-09-08T21:05:42Z INFO     Train Epoch: 0 [33920/60000 (57%)]\tloss=0.4103\n",
+      "2023-09-08T21:05:44Z INFO     Train Epoch: 0 [34240/60000 (57%)]\tloss=0.4330\n",
+      "2023-09-08T21:05:46Z INFO     Train Epoch: 0 [34560/60000 (58%)]\tloss=0.3828\n",
+      "2023-09-08T21:05:48Z INFO     Train Epoch: 0 [34880/60000 (58%)]\tloss=0.6742\n",
+      "2023-09-08T21:05:49Z INFO     Train Epoch: 0 [35200/60000 (59%)]\tloss=0.5098\n",
+      "2023-09-08T21:05:51Z INFO     Train Epoch: 0 [35520/60000 (59%)]\tloss=0.5187\n",
+      "2023-09-08T21:05:53Z INFO     Train Epoch: 0 [35840/60000 (60%)]\tloss=0.5226\n",
+      "2023-09-08T21:05:54Z INFO     Train Epoch: 0 [36160/60000 (60%)]\tloss=0.7099\n",
+      "2023-09-08T21:05:56Z INFO     Train Epoch: 0 [36480/60000 (61%)]\tloss=0.6922\n",
+      "2023-09-08T21:05:59Z INFO     Train Epoch: 0 [36800/60000 (61%)]\tloss=0.6208\n",
+      "2023-09-08T21:06:01Z INFO     Train Epoch: 0 [37120/60000 (62%)]\tloss=0.7056\n",
+      "2023-09-08T21:06:03Z INFO     Train Epoch: 0 [37440/60000 (62%)]\tloss=0.5346\n",
+      "2023-09-08T21:06:05Z INFO     Train Epoch: 0 [37760/60000 (63%)]\tloss=0.4693\n",
+      "2023-09-08T21:06:07Z INFO     Train Epoch: 0 [38080/60000 (63%)]\tloss=0.8529\n",
+      "2023-09-08T21:06:10Z INFO     Train Epoch: 0 [38400/60000 (64%)]\tloss=0.6755\n",
+      "2023-09-08T21:06:11Z INFO     Train Epoch: 0 [38720/60000 (65%)]\tloss=0.5663\n",
+      "2023-09-08T21:06:13Z INFO     Train Epoch: 0 [39040/60000 (65%)]\tloss=0.5107\n",
+      "2023-09-08T21:06:15Z INFO     Train Epoch: 0 [39360/60000 (66%)]\tloss=0.4245\n",
+      "2023-09-08T21:06:17Z INFO     Train Epoch: 0 [39680/60000 (66%)]\tloss=0.5797\n",
+      "2023-09-08T21:06:19Z INFO     Train Epoch: 0 [40000/60000 (67%)]\tloss=0.5011\n",
+      "2023-09-08T21:06:20Z INFO     Train Epoch: 0 [40320/60000 (67%)]\tloss=0.4641\n",
+      "2023-09-08T21:06:22Z INFO     Train Epoch: 0 [40640/60000 (68%)]\tloss=0.2431\n",
+      "2023-09-08T21:06:24Z INFO     Train Epoch: 0 [40960/60000 (68%)]\tloss=0.5040\n",
+      "2023-09-08T21:06:26Z INFO     Train Epoch: 0 [41280/60000 (69%)]\tloss=0.6674\n",
+      "2023-09-08T21:06:29Z INFO     Train Epoch: 0 [41600/60000 (69%)]\tloss=0.8426\n",
+      "2023-09-08T21:06:31Z INFO     Train Epoch: 0 [41920/60000 (70%)]\tloss=0.5418\n",
+      "2023-09-08T21:06:33Z INFO     Train Epoch: 0 [42240/60000 (70%)]\tloss=0.6396\n",
+      "2023-09-08T21:06:35Z INFO     Train Epoch: 0 [42560/60000 (71%)]\tloss=0.4182\n",
+      "2023-09-08T21:06:38Z INFO     Train Epoch: 0 [42880/60000 (71%)]\tloss=0.7471\n",
+      "2023-09-08T21:06:40Z INFO     Train Epoch: 0 [43200/60000 (72%)]\tloss=0.6492\n",
+      "2023-09-08T21:06:42Z INFO     Train Epoch: 0 [43520/60000 (73%)]\tloss=0.3955\n",
+      "2023-09-08T21:06:44Z INFO     Train Epoch: 0 [43840/60000 (73%)]\tloss=0.5986\n",
+      "2023-09-08T21:06:46Z INFO     Train Epoch: 0 [44160/60000 (74%)]\tloss=0.5604\n",
+      "2023-09-08T21:06:48Z INFO     Train Epoch: 0 [44480/60000 (74%)]\tloss=0.4396\n",
+      "2023-09-08T21:06:50Z INFO     Train Epoch: 0 [44800/60000 (75%)]\tloss=0.5718\n",
+      "2023-09-08T21:06:52Z INFO     Train Epoch: 0 [45120/60000 (75%)]\tloss=0.5190\n",
+      "2023-09-08T21:06:54Z INFO     Train Epoch: 0 [45440/60000 (76%)]\tloss=0.7500\n",
+      "2023-09-08T21:06:56Z INFO     Train Epoch: 0 [45760/60000 (76%)]\tloss=0.4298\n",
+      "2023-09-08T21:06:58Z INFO     Train Epoch: 0 [46080/60000 (77%)]\tloss=0.5909\n",
+      "2023-09-08T21:07:00Z INFO     Train Epoch: 0 [46400/60000 (77%)]\tloss=0.4499\n",
+      "2023-09-08T21:07:02Z INFO     Train Epoch: 0 [46720/60000 (78%)]\tloss=0.6639\n",
+      "2023-09-08T21:07:05Z INFO     Train Epoch: 0 [47040/60000 (78%)]\tloss=0.3891\n",
+      "2023-09-08T21:07:08Z INFO     Train Epoch: 0 [47360/60000 (79%)]\tloss=0.5912\n",
+      "2023-09-08T21:07:10Z INFO     Train Epoch: 0 [47680/60000 (79%)]\tloss=0.4047\n",
+      "2023-09-08T21:07:12Z INFO     Train Epoch: 0 [48000/60000 (80%)]\tloss=0.5517\n",
+      "2023-09-08T21:07:14Z INFO     Train Epoch: 0 [48320/60000 (81%)]\tloss=0.5204\n",
+      "2023-09-08T21:07:17Z INFO     Train Epoch: 0 [48640/60000 (81%)]\tloss=0.7532\n",
+      "2023-09-08T21:07:19Z INFO     Train Epoch: 0 [48960/60000 (82%)]\tloss=0.6107\n",
+      "2023-09-08T21:07:20Z INFO     Train Epoch: 0 [49280/60000 (82%)]\tloss=0.6882\n",
+      "2023-09-08T21:07:22Z INFO     Train Epoch: 0 [49600/60000 (83%)]\tloss=0.3215\n",
+      "2023-09-08T21:07:24Z INFO     Train Epoch: 0 [49920/60000 (83%)]\tloss=0.3356\n",
+      "2023-09-08T21:07:26Z INFO     Train Epoch: 0 [50240/60000 (84%)]\tloss=0.4973\n",
+      "2023-09-08T21:07:28Z INFO     Train Epoch: 0 [50560/60000 (84%)]\tloss=0.8383\n",
+      "2023-09-08T21:07:31Z INFO     Train Epoch: 0 [50880/60000 (85%)]\tloss=0.4020\n",
+      "2023-09-08T21:07:32Z INFO     Train Epoch: 0 [51200/60000 (85%)]\tloss=0.4866\n",
+      "2023-09-08T21:07:34Z INFO     Train Epoch: 0 [51520/60000 (86%)]\tloss=0.4938\n",
+      "2023-09-08T21:07:36Z INFO     Train Epoch: 0 [51840/60000 (86%)]\tloss=0.7432\n",
+      "2023-09-08T21:07:38Z INFO     Train Epoch: 0 [52160/60000 (87%)]\tloss=0.4650\n",
+      "2023-09-08T21:07:40Z INFO     Train Epoch: 0 [52480/60000 (87%)]\tloss=0.8149\n",
+      "2023-09-08T21:07:41Z INFO     Train Epoch: 0 [52800/60000 (88%)]\tloss=0.5370\n",
+      "2023-09-08T21:07:43Z INFO     Train Epoch: 0 [53120/60000 (89%)]\tloss=0.7261\n",
+      "2023-09-08T21:07:46Z INFO     Train Epoch: 0 [53440/60000 (89%)]\tloss=0.6188\n",
+      "2023-09-08T21:07:48Z INFO     Train Epoch: 0 [53760/60000 (90%)]\tloss=0.5179\n",
+      "2023-09-08T21:07:51Z INFO     Train Epoch: 0 [54080/60000 (90%)]\tloss=0.7616\n",
+      "2023-09-08T21:07:53Z INFO     Train Epoch: 0 [54400/60000 (91%)]\tloss=0.7180\n",
+      "2023-09-08T21:07:55Z INFO     Train Epoch: 0 [54720/60000 (91%)]\tloss=0.4831\n",
+      "2023-09-08T21:07:56Z INFO     Train Epoch: 0 [55040/60000 (92%)]\tloss=0.3719\n",
+      "2023-09-08T21:07:59Z INFO     Train Epoch: 0 [55360/60000 (92%)]\tloss=0.4730\n",
+      "2023-09-08T21:08:01Z INFO     Train Epoch: 0 [55680/60000 (93%)]\tloss=0.5402\n",
+      "2023-09-08T21:08:02Z INFO     Train Epoch: 0 [56000/60000 (93%)]\tloss=0.7432\n",
+      "2023-09-08T21:08:04Z INFO     Train Epoch: 0 [56320/60000 (94%)]\tloss=0.6275\n",
+      "2023-09-08T21:08:06Z INFO     Train Epoch: 0 [56640/60000 (94%)]\tloss=0.3235\n",
+      "2023-09-08T21:08:07Z INFO     Train Epoch: 0 [56960/60000 (95%)]\tloss=0.7855\n",
+      "2023-09-08T21:08:09Z INFO     Train Epoch: 0 [57280/60000 (95%)]\tloss=0.5046\n",
+      "2023-09-08T21:08:11Z INFO     Train Epoch: 0 [57600/60000 (96%)]\tloss=0.5732\n",
+      "2023-09-08T21:08:13Z INFO     Train Epoch: 0 [57920/60000 (97%)]\tloss=0.2879\n",
+      "2023-09-08T21:08:15Z INFO     Train Epoch: 0 [58240/60000 (97%)]\tloss=0.4233\n",
+      "2023-09-08T21:08:18Z INFO     Train Epoch: 0 [58560/60000 (98%)]\tloss=0.5561\n",
+      "2023-09-08T21:08:20Z INFO     Train Epoch: 0 [58880/60000 (98%)]\tloss=0.6785\n",
+      "2023-09-08T21:08:21Z INFO     Train Epoch: 0 [59200/60000 (99%)]\tloss=0.3826\n",
+      "2023-09-08T21:08:23Z INFO     Train Epoch: 0 [59520/60000 (99%)]\tloss=0.5397\n",
+      "2023-09-08T21:08:26Z INFO     Train Epoch: 0 [59840/60000 (100%)]\tloss=0.5987\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_client.get_job_logs(pytorchjob_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "17b0ca43-1936-4708-b03b-3ab9ac2bbdea",
+   "metadata": {},
+   "source": [
+    "## Delete PyTorchJob\n",
+    "\n",
+    "When PyTorchJob is finished, you can delete the resource."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "32ae88fd-5b5d-4ba1-a560-9a35c5ac17de",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-09-08T22:10:29Z INFO     PyTorchJob default/train-pytorch has been deleted\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_client.delete_job(pytorchjob_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9641e9f-551d-44d5-872b-002fffaedcef",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb b/examples/sdk/create-pytorchjob.ipynb
similarity index 65%
rename from sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb
rename to examples/sdk/create-pytorchjob.ipynb
index 0c2c28e02d..d960043ad3 100644
--- a/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb
+++ b/examples/sdk/create-pytorchjob.ipynb
@@ -8,7 +8,7 @@
     }
    },
    "source": [
-    "# Sample for Kubeflow PyTorchJob SDK"
+    "# Create PyTorchJob using Kubeflow Training SDK"
    ]
   },
   {
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 2,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -60,11 +60,13 @@
     "from kubernetes.client import V1PodSpec\n",
     "from kubernetes.client import V1Container\n",
     "\n",
-    "from kubeflow.training import V1ReplicaSpec\n",
+    "from kubeflow.training import KubeflowOrgV1ReplicaSpec\n",
     "from kubeflow.training import KubeflowOrgV1PyTorchJob\n",
     "from kubeflow.training import KubeflowOrgV1PyTorchJobSpec\n",
-    "from kubeflow.training import V1RunPolicy\n",
-    "from kubeflow.training import TrainingClient"
+    "from kubeflow.training import KubeflowOrgV1RunPolicy\n",
+    "from kubeflow.training import TrainingClient\n",
+    "\n",
+    "from kubeflow.training import constants"
    ]
   },
   {
@@ -91,7 +93,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 3,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -109,7 +111,7 @@
     "    args=[\"--backend\", \"gloo\"],\n",
     ")\n",
     "\n",
-    "replica_spec = V1ReplicaSpec(\n",
+    "replica_spec = KubeflowOrgV1ReplicaSpec(\n",
     "    replicas=1,\n",
     "    restart_policy=\"OnFailure\",\n",
     "    template=V1PodTemplateSpec(\n",
@@ -121,23 +123,17 @@
     "            }\n",
     "        ),\n",
     "        spec=V1PodSpec(\n",
-    "            containers=[\n",
-    "                V1Container(\n",
-    "                    name=container_name,\n",
-    "                    image=\"gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0\",\n",
-    "                    args=[\"--backend\", \"gloo\"],\n",
-    "                )\n",
-    "            ]\n",
+    "            containers=[container]\n",
     "        )\n",
     "    )\n",
     ")\n",
     "\n",
     "pytorchjob = KubeflowOrgV1PyTorchJob(\n",
-    "    api_version=\"kubeflow.org/v1\",\n",
-    "    kind=\"PyTorchJob\",\n",
+    "    api_version=constants.API_VERSION,\n",
+    "    kind=constants.PYTORCHJOB_KIND,\n",
     "    metadata=V1ObjectMeta(name=name, namespace=namespace),\n",
     "    spec=KubeflowOrgV1PyTorchJobSpec(\n",
-    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
+    "        run_policy=KubeflowOrgV1RunPolicy(clean_pod_policy=\"None\"),\n",
     "        pytorch_replica_specs={\n",
     "            \"Master\": replica_spec,\n",
     "            \"Worker\": replica_spec\n",
@@ -156,12 +152,12 @@
    "source": [
     "## Create PyTorchJob\n",
     "\n",
-    "You have to create Training Client to deploy you PyTorchJob in you cluster."
+    "You have to create Training Client to deploy your PyTorchJob in you cluster."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 4,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -177,8 +173,11 @@
     }
    ],
    "source": [
-    "training_client = TrainingClient()\n",
-    "training_client.create_pytorchjob(pytorchjob, namespace=namespace)"
+    "# Namespace will be reused in every APIs.\n",
+    "training_client = TrainingClient(namespace=namespace)\n",
+    "\n",
+    "# If `job_kind` is not set in `TrainingClient`, we need to set it for each API.\n",
+    "training_client.create_job(pytorchjob, job_kind=constants.PYTORCHJOB_KIND)"
    ]
   },
   {
@@ -196,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 5,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -209,13 +208,13 @@
        "'pytorch-dist-mnist-gloo'"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "training_client.get_pytorchjob(name).metadata.name"
+    "training_client.get_job(name, job_kind=constants.PYTORCHJOB_KIND).metadata.name"
    ]
   },
   {
@@ -231,7 +230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 7,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -241,27 +240,27 @@
     {
      "data": {
       "text/plain": [
-       "[{'last_transition_time': datetime.datetime(2023, 1, 12, 18, 30, 13, tzinfo=tzlocal()),\n",
-       "  'last_update_time': datetime.datetime(2023, 1, 12, 18, 30, 13, tzinfo=tzlocal()),\n",
+       "[{'last_transition_time': datetime.datetime(2023, 9, 8, 21, 14, 59, tzinfo=tzutc()),\n",
+       "  'last_update_time': datetime.datetime(2023, 9, 8, 21, 14, 59, tzinfo=tzutc()),\n",
        "  'message': 'PyTorchJob pytorch-dist-mnist-gloo is created.',\n",
        "  'reason': 'PyTorchJobCreated',\n",
        "  'status': 'True',\n",
        "  'type': 'Created'},\n",
-       " {'last_transition_time': datetime.datetime(2023, 1, 12, 18, 30, 18, tzinfo=tzlocal()),\n",
-       "  'last_update_time': datetime.datetime(2023, 1, 12, 18, 30, 18, tzinfo=tzlocal()),\n",
+       " {'last_transition_time': datetime.datetime(2023, 9, 8, 21, 15, 45, tzinfo=tzutc()),\n",
+       "  'last_update_time': datetime.datetime(2023, 9, 8, 21, 15, 45, tzinfo=tzutc()),\n",
        "  'message': 'PyTorchJob pytorch-dist-mnist-gloo is running.',\n",
        "  'reason': 'JobRunning',\n",
        "  'status': 'True',\n",
        "  'type': 'Running'}]"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "training_client.get_job_conditions(name=name, namespace=namespace, job_kind=\"PyTorchJob\")"
+    "training_client.get_job_conditions(name=name, job_kind=constants.PYTORCHJOB_KIND)"
    ]
   },
   {
@@ -277,27 +276,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 8,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
    },
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "NAME                           STATE                TIME\n",
+      "pytorch-dist-mnist-gloo        Running              2023-09-08 21:15:45+00:00\n",
+      "pytorch-dist-mnist-gloo        Running              2023-09-08 21:15:45+00:00\n",
+      "pytorch-dist-mnist-gloo        Succeeded            2023-09-08 21:26:44+00:00\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "pytorch-dist-mnist-gloo        Running              2023-01-12 18:30:18+00:00\n",
-      "pytorch-dist-mnist-gloo        Running              2023-01-12 18:30:18+00:00\n",
-      "pytorch-dist-mnist-gloo        Running              2023-01-12 18:30:18+00:00\n",
-      "pytorch-dist-mnist-gloo        Succeeded            2023-01-12 18:36:48+00:00\n",
       "Succeeded number of replicas: 1\n"
      ]
     }
    ],
    "source": [
-    "pytorchjob = training_client.wait_for_job_conditions(name=name, namespace=namespace, job_kind=\"PyTorchJob\")\n",
+    "pytorchjob = training_client.wait_for_job_conditions(name=name, job_kind=constants.PYTORCHJOB_KIND)\n",
     "\n",
     "print(f\"Succeeded number of replicas: {pytorchjob.status.replica_statuses['Master'].succeeded}\")"
    ]
@@ -315,7 +320,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 9,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -328,13 +333,13 @@
        "True"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "training_client.is_job_succeeded(name=name, namespace=namespace, job_kind=\"PyTorchJob\")"
+    "training_client.is_job_succeeded(name=name, job_kind=constants.PYTORCHJOB_KIND)"
    ]
   },
   {
@@ -350,7 +355,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 10,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -403,75 +408,75 @@
       "Train Epoch: 1 [19840/60000 (33%)]\tloss=0.1191\n",
       "Train Epoch: 1 [20480/60000 (34%)]\tloss=0.1905\n",
       "Train Epoch: 1 [21120/60000 (35%)]\tloss=0.1408\n",
-      "Train Epoch: 1 [21760/60000 (36%)]\tloss=0.3150\n",
-      "Train Epoch: 1 [22400/60000 (37%)]\tloss=0.1506\n",
-      "Train Epoch: 1 [23040/60000 (38%)]\tloss=0.2899\n",
-      "Train Epoch: 1 [23680/60000 (39%)]\tloss=0.4676\n",
-      "Train Epoch: 1 [24320/60000 (41%)]\tloss=0.2157\n",
-      "Train Epoch: 1 [24960/60000 (42%)]\tloss=0.1520\n",
-      "Train Epoch: 1 [25600/60000 (43%)]\tloss=0.2244\n",
-      "Train Epoch: 1 [26240/60000 (44%)]\tloss=0.2632\n",
+      "Train Epoch: 1 [21760/60000 (36%)]\tloss=0.3147\n",
+      "Train Epoch: 1 [22400/60000 (37%)]\tloss=0.1505\n",
+      "Train Epoch: 1 [23040/60000 (38%)]\tloss=0.2898\n",
+      "Train Epoch: 1 [23680/60000 (39%)]\tloss=0.4685\n",
+      "Train Epoch: 1 [24320/60000 (41%)]\tloss=0.2158\n",
+      "Train Epoch: 1 [24960/60000 (42%)]\tloss=0.1521\n",
+      "Train Epoch: 1 [25600/60000 (43%)]\tloss=0.2248\n",
+      "Train Epoch: 1 [26240/60000 (44%)]\tloss=0.2623\n",
       "Train Epoch: 1 [26880/60000 (45%)]\tloss=0.2335\n",
-      "Train Epoch: 1 [27520/60000 (46%)]\tloss=0.2619\n",
+      "Train Epoch: 1 [27520/60000 (46%)]\tloss=0.2623\n",
       "Train Epoch: 1 [28160/60000 (47%)]\tloss=0.2126\n",
-      "Train Epoch: 1 [28800/60000 (48%)]\tloss=0.1324\n",
-      "Train Epoch: 1 [29440/60000 (49%)]\tloss=0.2795\n",
-      "Train Epoch: 1 [30080/60000 (50%)]\tloss=0.0951\n",
-      "Train Epoch: 1 [30720/60000 (51%)]\tloss=0.1284\n",
-      "Train Epoch: 1 [31360/60000 (52%)]\tloss=0.2461\n",
-      "Train Epoch: 1 [32000/60000 (53%)]\tloss=0.3394\n",
-      "Train Epoch: 1 [32640/60000 (54%)]\tloss=0.1517\n",
+      "Train Epoch: 1 [28800/60000 (48%)]\tloss=0.1328\n",
+      "Train Epoch: 1 [29440/60000 (49%)]\tloss=0.2779\n",
+      "Train Epoch: 1 [30080/60000 (50%)]\tloss=0.0943\n",
+      "Train Epoch: 1 [30720/60000 (51%)]\tloss=0.1285\n",
+      "Train Epoch: 1 [31360/60000 (52%)]\tloss=0.2455\n",
+      "Train Epoch: 1 [32000/60000 (53%)]\tloss=0.3396\n",
+      "Train Epoch: 1 [32640/60000 (54%)]\tloss=0.1523\n",
       "Train Epoch: 1 [33280/60000 (55%)]\tloss=0.0916\n",
-      "Train Epoch: 1 [33920/60000 (57%)]\tloss=0.1449\n",
-      "Train Epoch: 1 [34560/60000 (58%)]\tloss=0.1978\n",
-      "Train Epoch: 1 [35200/60000 (59%)]\tloss=0.2189\n",
-      "Train Epoch: 1 [35840/60000 (60%)]\tloss=0.0637\n",
-      "Train Epoch: 1 [36480/60000 (61%)]\tloss=0.1368\n",
-      "Train Epoch: 1 [37120/60000 (62%)]\tloss=0.1153\n",
-      "Train Epoch: 1 [37760/60000 (63%)]\tloss=0.2358\n",
-      "Train Epoch: 1 [38400/60000 (64%)]\tloss=0.0631\n",
-      "Train Epoch: 1 [39040/60000 (65%)]\tloss=0.1063\n",
-      "Train Epoch: 1 [39680/60000 (66%)]\tloss=0.1602\n",
-      "Train Epoch: 1 [40320/60000 (67%)]\tloss=0.1098\n",
-      "Train Epoch: 1 [40960/60000 (68%)]\tloss=0.1781\n",
-      "Train Epoch: 1 [41600/60000 (69%)]\tloss=0.2297\n",
-      "Train Epoch: 1 [42240/60000 (70%)]\tloss=0.0735\n",
-      "Train Epoch: 1 [42880/60000 (71%)]\tloss=0.1562\n",
-      "Train Epoch: 1 [43520/60000 (72%)]\tloss=0.2771\n",
-      "Train Epoch: 1 [44160/60000 (74%)]\tloss=0.1429\n",
-      "Train Epoch: 1 [44800/60000 (75%)]\tloss=0.1172\n",
-      "Train Epoch: 1 [45440/60000 (76%)]\tloss=0.1202\n",
+      "Train Epoch: 1 [33920/60000 (57%)]\tloss=0.1448\n",
+      "Train Epoch: 1 [34560/60000 (58%)]\tloss=0.1989\n",
+      "Train Epoch: 1 [35200/60000 (59%)]\tloss=0.2183\n",
+      "Train Epoch: 1 [35840/60000 (60%)]\tloss=0.0638\n",
+      "Train Epoch: 1 [36480/60000 (61%)]\tloss=0.1373\n",
+      "Train Epoch: 1 [37120/60000 (62%)]\tloss=0.1147\n",
+      "Train Epoch: 1 [37760/60000 (63%)]\tloss=0.2355\n",
+      "Train Epoch: 1 [38400/60000 (64%)]\tloss=0.0636\n",
+      "Train Epoch: 1 [39040/60000 (65%)]\tloss=0.1065\n",
+      "Train Epoch: 1 [39680/60000 (66%)]\tloss=0.1599\n",
+      "Train Epoch: 1 [40320/60000 (67%)]\tloss=0.1090\n",
+      "Train Epoch: 1 [40960/60000 (68%)]\tloss=0.1774\n",
+      "Train Epoch: 1 [41600/60000 (69%)]\tloss=0.2307\n",
+      "Train Epoch: 1 [42240/60000 (70%)]\tloss=0.0736\n",
+      "Train Epoch: 1 [42880/60000 (71%)]\tloss=0.1553\n",
+      "Train Epoch: 1 [43520/60000 (72%)]\tloss=0.2793\n",
+      "Train Epoch: 1 [44160/60000 (74%)]\tloss=0.1428\n",
+      "Train Epoch: 1 [44800/60000 (75%)]\tloss=0.1179\n",
+      "Train Epoch: 1 [45440/60000 (76%)]\tloss=0.1205\n",
       "Train Epoch: 1 [46080/60000 (77%)]\tloss=0.0767\n",
-      "Train Epoch: 1 [46720/60000 (78%)]\tloss=0.1938\n",
-      "Train Epoch: 1 [47360/60000 (79%)]\tloss=0.0699\n",
-      "Train Epoch: 1 [48000/60000 (80%)]\tloss=0.2114\n",
-      "Train Epoch: 1 [48640/60000 (81%)]\tloss=0.1373\n",
-      "Train Epoch: 1 [49280/60000 (82%)]\tloss=0.0934\n",
-      "Train Epoch: 1 [49920/60000 (83%)]\tloss=0.1075\n",
-      "Train Epoch: 1 [50560/60000 (84%)]\tloss=0.1185\n",
-      "Train Epoch: 1 [51200/60000 (85%)]\tloss=0.1457\n",
-      "Train Epoch: 1 [51840/60000 (86%)]\tloss=0.0694\n",
-      "Train Epoch: 1 [52480/60000 (87%)]\tloss=0.0242\n",
-      "Train Epoch: 1 [53120/60000 (88%)]\tloss=0.2635\n",
+      "Train Epoch: 1 [46720/60000 (78%)]\tloss=0.1946\n",
+      "Train Epoch: 1 [47360/60000 (79%)]\tloss=0.0703\n",
+      "Train Epoch: 1 [48000/60000 (80%)]\tloss=0.2094\n",
+      "Train Epoch: 1 [48640/60000 (81%)]\tloss=0.1378\n",
+      "Train Epoch: 1 [49280/60000 (82%)]\tloss=0.0950\n",
+      "Train Epoch: 1 [49920/60000 (83%)]\tloss=0.1066\n",
+      "Train Epoch: 1 [50560/60000 (84%)]\tloss=0.1182\n",
+      "Train Epoch: 1 [51200/60000 (85%)]\tloss=0.1455\n",
+      "Train Epoch: 1 [51840/60000 (86%)]\tloss=0.0684\n",
+      "Train Epoch: 1 [52480/60000 (87%)]\tloss=0.0241\n",
+      "Train Epoch: 1 [53120/60000 (88%)]\tloss=0.2626\n",
       "Train Epoch: 1 [53760/60000 (90%)]\tloss=0.0922\n",
-      "Train Epoch: 1 [54400/60000 (91%)]\tloss=0.1287\n",
-      "Train Epoch: 1 [55040/60000 (92%)]\tloss=0.1908\n",
-      "Train Epoch: 1 [55680/60000 (93%)]\tloss=0.0350\n",
-      "Train Epoch: 1 [56320/60000 (94%)]\tloss=0.0359\n",
-      "Train Epoch: 1 [56960/60000 (95%)]\tloss=0.0762\n",
-      "Train Epoch: 1 [57600/60000 (96%)]\tloss=0.1173\n",
-      "Train Epoch: 1 [58240/60000 (97%)]\tloss=0.1948\n",
-      "Train Epoch: 1 [58880/60000 (98%)]\tloss=0.2035\n",
-      "Train Epoch: 1 [59520/60000 (99%)]\tloss=0.0639\n",
+      "Train Epoch: 1 [54400/60000 (91%)]\tloss=0.1301\n",
+      "Train Epoch: 1 [55040/60000 (92%)]\tloss=0.1921\n",
+      "Train Epoch: 1 [55680/60000 (93%)]\tloss=0.0346\n",
+      "Train Epoch: 1 [56320/60000 (94%)]\tloss=0.0358\n",
+      "Train Epoch: 1 [56960/60000 (95%)]\tloss=0.0767\n",
+      "Train Epoch: 1 [57600/60000 (96%)]\tloss=0.1167\n",
+      "Train Epoch: 1 [58240/60000 (97%)]\tloss=0.1932\n",
+      "Train Epoch: 1 [58880/60000 (98%)]\tloss=0.2062\n",
+      "Train Epoch: 1 [59520/60000 (99%)]\tloss=0.0647\n",
       "\n",
-      "accuracy=0.9665\n",
+      "accuracy=0.9669\n",
       "\n",
       "\n"
      ]
     }
    ],
    "source": [
-    "training_client.get_job_logs(name=name, namespace=namespace, container=container_name)"
+    "training_client.get_job_logs(name=name, job_kind=constants.PYTORCHJOB_KIND)"
    ]
   },
   {
@@ -487,7 +492,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 11,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -503,7 +508,7 @@
     }
    ],
    "source": [
-    "training_client.delete_pytorchjob(name)"
+    "training_client.delete_job(name)"
    ]
   },
   {
@@ -530,7 +535,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/examples/sdk/create-tfjob.ipynb b/examples/sdk/create-tfjob.ipynb
new file mode 100644
index 0000000000..182e977ea4
--- /dev/null
+++ b/examples/sdk/create-tfjob.ipynb
@@ -0,0 +1,405 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "# Create TFJob using Kubeflow Training SDK"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "This is a sample for Kubeflow Training SDK `kubeflow-training`.\n",
+    "\n",
+    "The notebook shows how to use Kubeflow TFJob SDK to create, get, wait, check and delete TFJob."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Install Kubeflow Training Python SDKs\n",
+    "\n",
+    "You need to install Kubeflow Training SDK to run this Notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n",
+    "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from kubernetes.client import V1PodTemplateSpec\n",
+    "from kubernetes.client import V1ObjectMeta\n",
+    "from kubernetes.client import V1PodSpec\n",
+    "from kubernetes.client import V1Container\n",
+    "\n",
+    "\n",
+    "from kubeflow.training import KubeflowOrgV1ReplicaSpec\n",
+    "from kubeflow.training import KubeflowOrgV1TFJob\n",
+    "from kubeflow.training import KubeflowOrgV1TFJobSpec\n",
+    "from kubeflow.training import KubeflowOrgV1RunPolicy\n",
+    "from kubeflow.training import TrainingClient\n",
+    "\n",
+    "from kubeflow.training import constants"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "## Define TFJob"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "The demo runs Tensorflow MNIST example with 2 workers, chief, and parameter server for TFJob."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "name = \"mnist\"\n",
+    "namespace = \"kubeflow-user-example-com\"\n",
+    "container_name = \"tensorflow\"\n",
+    "\n",
+    "container = V1Container(\n",
+    "    name=container_name,\n",
+    "    image=\"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0\",\n",
+    "    command=[\n",
+    "        \"python\",\n",
+    "        \"/var/tf_mnist/mnist_with_summaries.py\",\n",
+    "        \"--log_dir=/train/logs\", \"--learning_rate=0.01\",\n",
+    "        \"--batch_size=150\"\n",
+    "        ]\n",
+    ")\n",
+    "\n",
+    "worker = KubeflowOrgV1ReplicaSpec(\n",
+    "    replicas=2,\n",
+    "    restart_policy=\"Never\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        spec=V1PodSpec(\n",
+    "            containers=[container]\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "chief = KubeflowOrgV1ReplicaSpec(\n",
+    "    replicas=1,\n",
+    "    restart_policy=\"Never\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        spec=V1PodSpec(\n",
+    "            containers=[container]\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "ps = KubeflowOrgV1ReplicaSpec(\n",
+    "    replicas=1,\n",
+    "    restart_policy=\"Never\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        spec=V1PodSpec(\n",
+    "            containers=[container]\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "tfjob = KubeflowOrgV1TFJob(\n",
+    "    api_version=constants.API_VERSION,\n",
+    "    kind=constants.TFJOB_KIND,\n",
+    "    metadata=V1ObjectMeta(name=\"mnist\",namespace=namespace),\n",
+    "    spec=KubeflowOrgV1TFJobSpec(\n",
+    "        run_policy=KubeflowOrgV1RunPolicy(clean_pod_policy=\"None\"),\n",
+    "        tf_replica_specs={\"Worker\": worker,\n",
+    "                          \"Chief\": chief,\n",
+    "                          \"PS\": ps}\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create TFJob\n",
+    "\n",
+    "You have to create Training Client to deploy your TFJob in you cluster."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "TFJob kubeflow-user-example-com/mnist has been created\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Namespace and Job kind will be reused in every APIs.\n",
+    "training_client = TrainingClient(namespace=namespace, job_kind=constants.TFJOB_KIND)\n",
+    "training_client.create_job(tfjob)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get the Created TFJob\n",
+    "\n",
+    "You can verify the created TFJob status."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'completion_time': None,\n",
+       " 'conditions': [{'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n",
+       "                 'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n",
+       "                 'message': 'TFJob mnist is created.',\n",
+       "                 'reason': 'TFJobCreated',\n",
+       "                 'status': 'True',\n",
+       "                 'type': 'Created'},\n",
+       "                {'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n",
+       "                 'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n",
+       "                 'message': 'TFJob kubeflow-user-example-com/mnist is running.',\n",
+       "                 'reason': 'TFJobRunning',\n",
+       "                 'status': 'True',\n",
+       "                 'type': 'Running'}],\n",
+       " 'last_reconcile_time': None,\n",
+       " 'replica_statuses': {'Chief': {'active': 1,\n",
+       "                                'failed': None,\n",
+       "                                'label_selector': None,\n",
+       "                                'selector': None,\n",
+       "                                'succeeded': None},\n",
+       "                      'PS': {'active': 1,\n",
+       "                             'failed': None,\n",
+       "                             'label_selector': None,\n",
+       "                             'selector': None,\n",
+       "                             'succeeded': None},\n",
+       "                      'Worker': {'active': 2,\n",
+       "                                 'failed': None,\n",
+       "                                 'label_selector': None,\n",
+       "                                 'selector': None,\n",
+       "                                 'succeeded': None}},\n",
+       " 'start_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc())}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "training_client.get_job(name).status"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get the TFJob Conditions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n",
+       "  'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n",
+       "  'message': 'TFJob mnist is created.',\n",
+       "  'reason': 'TFJobCreated',\n",
+       "  'status': 'True',\n",
+       "  'type': 'Created'},\n",
+       " {'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n",
+       "  'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n",
+       "  'message': 'TFJob kubeflow-user-example-com/mnist is running.',\n",
+       "  'reason': 'TFJobRunning',\n",
+       "  'status': 'True',\n",
+       "  'type': 'Running'}]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "training_client.get_job_conditions(name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Wait Until TFJob Finishes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "training_client.wait_for_job_conditions(name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Verify if TFJob is Succeeded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    },
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "training_client.is_job_succeeded(name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get the TFJob Training Logs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "training_client.get_job_logs(name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "## Delete the TFJob"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "training_client.delete_job(name)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/hack/python-sdk/post_gen.py b/hack/python-sdk/post_gen.py
index ab496bfb64..07b15a4468 100755
--- a/hack/python-sdk/post_gen.py
+++ b/hack/python-sdk/post_gen.py
@@ -44,7 +44,7 @@ def fix_test_files() -> None:
     test_folder_dir = os.path.join(sdk_dir, "test")
     test_files = os.listdir(test_folder_dir)
     for test_file in test_files:
-        print(f"Precessing file {test_file}")
+        print(f"Processing file {test_file}")
         if test_file.endswith(".py"):
             with fileinput.FileInput(
                 os.path.join(test_folder_dir, test_file), inplace=True
@@ -56,8 +56,9 @@ def fix_test_files() -> None:
 def add_imports() -> None:
     with open(os.path.join(sdk_dir, "kubeflow/training/__init__.py"), "a") as f:
         f.write("from kubeflow.training.api.training_client import TrainingClient\n")
+        f.write("from kubeflow.training.constants import constants\n")
     with open(os.path.join(sdk_dir, "kubeflow/__init__.py"), "a") as f:
-        f.write("__path__ = __import__('pkgutil').extend_path(__path__, __name__)")
+        f.write("__path__ = __import__('pkgutil').extend_path(__path__, __name__)\n")
 
     # Add Kubernetes models to proper deserialization of Training models.
     with open(os.path.join(sdk_dir, "kubeflow/training/models/__init__.py"), "r") as f:
diff --git a/sdk/python/examples/create-pytorchjob-from-func.ipynb b/sdk/python/examples/create-pytorchjob-from-func.ipynb
deleted file mode 100644
index aaafdf8132..0000000000
--- a/sdk/python/examples/create-pytorchjob-from-func.ipynb
+++ /dev/null
@@ -1,779 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "90d43b56-97e5-45e2-8e67-4488ed31d2df",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "# Run PyTorchJob From Function\n",
-    "\n",
-    "In this Notebook we are going to create [Kubeflow PyTorchJob](https://www.kubeflow.org/docs/components/training/pytorch/).\n",
-    "\n",
-    "The PyTorchJob will run distributive training using [DistributedDataParallel strategy](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a8bb6564-fde3-4c28-841c-012122643dd9",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "## Install Kubeflow Python SDKs\n",
-    "\n",
-    "You need to install PyTorch packages and Kubeflow SDKs to run this Notebook."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d49f072e-2221-48bb-9f6d-561713d1a45c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install torch==1.12.1\n",
-    "!pip install torchvision==0.13.1\n",
-    "\n",
-    "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n",
-    "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e9331a05-9127-4b3a-8077-31157e267827",
-   "metadata": {},
-   "source": [
-    "## Create Train Script for CNN Model\n",
-    "\n",
-    "This is simple **Convolutional Neural Network (CNN)** model for recognizing different picture of clothing using [Fashion MNIST Dataset](https://github.com/zalandoresearch/fashion-mnist)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "69f21f33-5c64-452c-90c4-977fc0dadb3b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "def train_pytorch_model():\n",
-    "    import logging\n",
-    "    import os\n",
-    "    from torchvision import transforms, datasets\n",
-    "    import torch\n",
-    "    from torch import nn\n",
-    "    import torch.nn.functional as F\n",
-    "    import torch.distributed as dist\n",
-    "\n",
-    "    logging.basicConfig(\n",
-    "        format=\"%(asctime)s %(levelname)-8s %(message)s\",\n",
-    "        datefmt=\"%Y-%m-%dT%H:%M:%SZ\",\n",
-    "        level=logging.DEBUG,\n",
-    "    )\n",
-    "\n",
-    "    # Create PyTorch CNN Model.\n",
-    "    class Net(nn.Module):\n",
-    "        def __init__(self):\n",
-    "            super(Net, self).__init__()\n",
-    "            self.conv1 = nn.Conv2d(1, 20, 5, 1)\n",
-    "            self.conv2 = nn.Conv2d(20, 50, 5, 1)\n",
-    "            self.fc1 = nn.Linear(4 * 4 * 50, 500)\n",
-    "            self.fc2 = nn.Linear(500, 10)\n",
-    "\n",
-    "        def forward(self, x):\n",
-    "            x = F.relu(self.conv1(x))\n",
-    "            x = F.max_pool2d(x, 2, 2)\n",
-    "            x = F.relu(self.conv2(x))\n",
-    "            x = F.max_pool2d(x, 2, 2)\n",
-    "            x = x.view(-1, 4 * 4 * 50)\n",
-    "            x = F.relu(self.fc1(x))\n",
-    "            x = self.fc2(x)\n",
-    "            return F.log_softmax(x, dim=1)\n",
-    "\n",
-    "    # Get dist parameters.\n",
-    "    # Kubeflow Training Operator automatically set appropriate RANK and WORLD_SIZE based on the configuration.\n",
-    "    RANK = int(os.environ[\"RANK\"])\n",
-    "    WORLD_SIZE = int(os.environ[\"WORLD_SIZE\"])\n",
-    "    \n",
-    "    model = Net()\n",
-    "    # Attach model to DistributedDataParallel strategy.\n",
-    "    dist.init_process_group(backend=\"gloo\", rank=RANK, world_size=WORLD_SIZE)\n",
-    "    Distributor = nn.parallel.DistributedDataParallel\n",
-    "    model = Distributor(model)\n",
-    "\n",
-    "    # Split batch size for each worker.\n",
-    "    batch_size = int(128 / WORLD_SIZE)\n",
-    "\n",
-    "    # Get Fashion MNIST DataSet.\n",
-    "    train_loader = torch.utils.data.DataLoader(\n",
-    "        datasets.FashionMNIST(\n",
-    "            \"./data\",\n",
-    "            train=True,\n",
-    "            download=True,\n",
-    "            transform=transforms.Compose([transforms.ToTensor()]),\n",
-    "        ),\n",
-    "        batch_size=batch_size,\n",
-    "    )\n",
-    "\n",
-    "    # Start Training.\n",
-    "    logging.info(f\"Start training for RANK: {RANK}. WORLD_SIZE: {WORLD_SIZE}\")\n",
-    "    for epoch in range(1):\n",
-    "        model.train()\n",
-    "        optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)\n",
-    "\n",
-    "        for batch_idx, (data, target) in enumerate(train_loader):\n",
-    "            optimizer.zero_grad()\n",
-    "            output = model(data)\n",
-    "            loss = F.nll_loss(output, target)\n",
-    "            loss.backward()\n",
-    "            optimizer.step()\n",
-    "            if batch_idx % 10 == 0:\n",
-    "                logging.info(\n",
-    "                    \"Train Epoch: {} [{}/{} ({:.0f}%)]\\tloss={:.4f}\".format(\n",
-    "                        epoch,\n",
-    "                        batch_idx * len(data),\n",
-    "                        len(train_loader.dataset),\n",
-    "                        100.0 * batch_idx / len(train_loader),\n",
-    "                        loss.item(),\n",
-    "                    )\n",
-    "                )"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8cfe8739-1f94-476a-80e3-dd6e3237d9ed",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2022-09-01T19:32:37.813779Z",
-     "iopub.status.busy": "2022-09-01T19:32:37.812759Z",
-     "iopub.status.idle": "2022-09-01T19:32:37.827050Z",
-     "shell.execute_reply": "2022-09-01T19:32:37.825186Z",
-     "shell.execute_reply.started": "2022-09-01T19:32:37.813690Z"
-    }
-   },
-   "source": [
-    "## Run Training Locally in the Notebook\n",
-    "\n",
-    "We are going to download Fashion MNIST Dataset and start local training."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "9e2c6fd8-d0ba-4bc6-ac90-d4cf09751ace",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-09-12T18:21:28Z INFO     Added key: store_based_barrier_key:1 to store for rank: 0\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "851b228ae0324915882f834224abe134",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/26421880 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c8dde30f1c2544f69c4f51331e0156c5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/29515 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "04cb10c56f73404d997b1b31221f5b10",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4422102 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "dd74e22f50034e889c4b3f9e7fff3f0c",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/5148 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "Processing...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.8/site-packages/torchvision/datasets/mnist.py:502: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:143.)\n",
-      "  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)\n",
-      "2022-09-12T18:31:05Z INFO     Start training for RANK: 0. WORLD_SIZE: 1\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done!\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-09-12T18:31:05Z INFO     Train Epoch: 0 [0/60000 (0%)]\tloss=2.3061\n",
-      "2022-09-12T18:31:05Z INFO     Reducer buckets have been rebuilt in this iteration.\n",
-      "2022-09-12T18:31:06Z INFO     Train Epoch: 0 [1280/60000 (2%)]\tloss=2.2979\n",
-      "2022-09-12T18:31:07Z INFO     Train Epoch: 0 [2560/60000 (4%)]\tloss=2.2926\n",
-      "2022-09-12T18:31:08Z INFO     Train Epoch: 0 [3840/60000 (6%)]\tloss=2.2796\n",
-      "2022-09-12T18:31:10Z INFO     Train Epoch: 0 [5120/60000 (9%)]\tloss=2.2838\n",
-      "2022-09-12T18:31:11Z INFO     Train Epoch: 0 [6400/60000 (11%)]\tloss=2.2751\n",
-      "2022-09-12T18:31:12Z INFO     Train Epoch: 0 [7680/60000 (13%)]\tloss=2.2683\n",
-      "2022-09-12T18:31:13Z INFO     Train Epoch: 0 [8960/60000 (15%)]\tloss=2.2443\n",
-      "2022-09-12T18:31:15Z INFO     Train Epoch: 0 [10240/60000 (17%)]\tloss=2.2341\n",
-      "2022-09-12T18:31:16Z INFO     Train Epoch: 0 [11520/60000 (19%)]\tloss=2.1962\n",
-      "2022-09-12T18:31:17Z INFO     Train Epoch: 0 [12800/60000 (21%)]\tloss=2.1701\n",
-      "2022-09-12T18:31:18Z INFO     Train Epoch: 0 [14080/60000 (23%)]\tloss=2.1368\n",
-      "2022-09-12T18:31:20Z INFO     Train Epoch: 0 [15360/60000 (26%)]\tloss=2.0717\n",
-      "2022-09-12T18:31:21Z INFO     Train Epoch: 0 [16640/60000 (28%)]\tloss=1.9831\n",
-      "2022-09-12T18:31:22Z INFO     Train Epoch: 0 [17920/60000 (30%)]\tloss=1.8490\n",
-      "2022-09-12T18:31:24Z INFO     Train Epoch: 0 [19200/60000 (32%)]\tloss=1.6720\n",
-      "2022-09-12T18:31:25Z INFO     Train Epoch: 0 [20480/60000 (34%)]\tloss=1.4354\n",
-      "2022-09-12T18:31:26Z INFO     Train Epoch: 0 [21760/60000 (36%)]\tloss=1.3926\n",
-      "2022-09-12T18:31:28Z INFO     Train Epoch: 0 [23040/60000 (38%)]\tloss=1.2361\n",
-      "2022-09-12T18:31:29Z INFO     Train Epoch: 0 [24320/60000 (41%)]\tloss=1.1674\n",
-      "2022-09-12T18:31:30Z INFO     Train Epoch: 0 [25600/60000 (43%)]\tloss=0.9845\n",
-      "2022-09-12T18:31:32Z INFO     Train Epoch: 0 [26880/60000 (45%)]\tloss=0.9887\n",
-      "2022-09-12T18:31:33Z INFO     Train Epoch: 0 [28160/60000 (47%)]\tloss=1.0034\n",
-      "2022-09-12T18:31:34Z INFO     Train Epoch: 0 [29440/60000 (49%)]\tloss=1.1126\n",
-      "2022-09-12T18:31:35Z INFO     Train Epoch: 0 [30720/60000 (51%)]\tloss=0.9854\n",
-      "2022-09-12T18:31:37Z INFO     Train Epoch: 0 [32000/60000 (53%)]\tloss=0.9148\n",
-      "2022-09-12T18:31:38Z INFO     Train Epoch: 0 [33280/60000 (55%)]\tloss=0.8559\n",
-      "2022-09-12T18:31:39Z INFO     Train Epoch: 0 [34560/60000 (58%)]\tloss=0.9737\n",
-      "2022-09-12T18:31:41Z INFO     Train Epoch: 0 [35840/60000 (60%)]\tloss=0.7636\n",
-      "2022-09-12T18:31:42Z INFO     Train Epoch: 0 [37120/60000 (62%)]\tloss=0.7537\n",
-      "2022-09-12T18:31:43Z INFO     Train Epoch: 0 [38400/60000 (64%)]\tloss=0.7180\n",
-      "2022-09-12T18:31:45Z INFO     Train Epoch: 0 [39680/60000 (66%)]\tloss=0.8250\n",
-      "2022-09-12T18:31:46Z INFO     Train Epoch: 0 [40960/60000 (68%)]\tloss=0.8221\n",
-      "2022-09-12T18:31:47Z INFO     Train Epoch: 0 [42240/60000 (70%)]\tloss=0.8605\n",
-      "2022-09-12T18:31:49Z INFO     Train Epoch: 0 [43520/60000 (72%)]\tloss=0.7450\n",
-      "2022-09-12T18:31:50Z INFO     Train Epoch: 0 [44800/60000 (75%)]\tloss=0.8031\n",
-      "2022-09-12T18:31:51Z INFO     Train Epoch: 0 [46080/60000 (77%)]\tloss=0.8090\n",
-      "2022-09-12T18:31:53Z INFO     Train Epoch: 0 [47360/60000 (79%)]\tloss=0.7897\n",
-      "2022-09-12T18:31:54Z INFO     Train Epoch: 0 [48640/60000 (81%)]\tloss=0.8838\n",
-      "2022-09-12T18:31:55Z INFO     Train Epoch: 0 [49920/60000 (83%)]\tloss=0.7967\n",
-      "2022-09-12T18:31:57Z INFO     Train Epoch: 0 [51200/60000 (85%)]\tloss=0.7554\n",
-      "2022-09-12T18:31:58Z INFO     Train Epoch: 0 [52480/60000 (87%)]\tloss=0.8402\n",
-      "2022-09-12T18:31:59Z INFO     Train Epoch: 0 [53760/60000 (90%)]\tloss=0.7859\n",
-      "2022-09-12T18:32:00Z INFO     Train Epoch: 0 [55040/60000 (92%)]\tloss=0.6342\n",
-      "2022-09-12T18:32:02Z INFO     Train Epoch: 0 [56320/60000 (94%)]\tloss=0.6881\n",
-      "2022-09-12T18:32:04Z INFO     Train Epoch: 0 [57600/60000 (96%)]\tloss=0.7722\n",
-      "2022-09-12T18:32:05Z INFO     Train Epoch: 0 [58880/60000 (98%)]\tloss=0.7504\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Set dist env variables to run the above training locally on the Notebook.\n",
-    "import os\n",
-    "os.environ[\"RANK\"] = \"0\"\n",
-    "os.environ[\"WORLD_SIZE\"] = \"1\"\n",
-    "os.environ[\"MASTER_ADDR\"] = \"localhost\"\n",
-    "os.environ[\"MASTER_PORT\"] = \"1234\"\n",
-    "\n",
-    "# Train Model locally in the Notebook.\n",
-    "train_pytorch_model()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5aae47e3-be31-468e-8f38-89e1e2f1c764",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "## Start Distributive Training with PyTorchJob\n",
-    "\n",
-    "Before creating PyTorchJob, you have to create `TrainingClient()`. It uses [Kubernetes Python client](https://github.com/kubernetes-client/python) to communicate with Kubernetes API server. You can set path and context for [the kubeconfig file](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/). The default location for the kubeconfig is `~/.kube/config`.\n",
-    "\n",
-    "Kubeflow Training Operator automatically set the appropriate env variables (`MASTER_PORT`, `MASTER_ADDR`, `WORLD_SIZE`, `RANK`) for each PyTorchJob container."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "eb1acd34-ebcf-409b-8bb3-0225cee37110",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "PyTorchJob kubeflow-user-example-com/train-pytorch has been created\n"
-     ]
-    }
-   ],
-   "source": [
-    "from kubeflow.training import TrainingClient\n",
-    "\n",
-    "# Start PyTorchJob Training.\n",
-    "pytorchjob_name = \"train-pytorch\"\n",
-    "training_client = TrainingClient()\n",
-    "\n",
-    "training_client.create_pytorchjob_from_func(\n",
-    "    name=pytorchjob_name,\n",
-    "    func=train_pytorch_model,\n",
-    "    num_worker_replicas=3, # How many PyTorch Workers will be run.\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e44c3ad7-62c4-4b58-b52a-15fd8746b772",
-   "metadata": {},
-   "source": [
-    "### Check PyTorchJob Status\n",
-    "\n",
-    "Use `KubeflowClient` APIs to get information about created PyTorchJob."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "4141f6c2-c38f-4972-b68a-35d150ef7485",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PyTorchJob Status: True\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"PyTorchJob Status: {training_client.is_job_running(name=pytorchjob_name, job_kind='PyTorchJob')}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "42e10587-7ac2-45bf-9c4f-d418e1585974",
-   "metadata": {},
-   "source": [
-    "### Get PyTorchJob Pod Names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "49b53308-a19b-45e8-942f-4333e727ee48",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['train-pytorch-master-0',\n",
-       " 'train-pytorch-worker-0',\n",
-       " 'train-pytorch-worker-1',\n",
-       " 'train-pytorch-worker-2']"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "training_client.get_job_pod_names(pytorchjob_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b91d332d-487c-4a95-937d-26ffb6199cda",
-   "metadata": {
-    "execution": {
-     "iopub.status.busy": "2022-09-01T20:10:25.759950Z",
-     "iopub.status.idle": "2022-09-01T20:10:25.760581Z",
-     "shell.execute_reply": "2022-09-01T20:10:25.760353Z",
-     "shell.execute_reply.started": "2022-09-01T20:10:25.760328Z"
-    },
-    "tags": []
-   },
-   "source": [
-    "### Get PyTorchJob Training Logs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "5232d542-d4bf-4c51-8b11-ad0534fb0b9d",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The logs of pod train-pytorch-master-0:\n",
-      " 2023-01-12T18:55:33Z INFO     Added key: store_based_barrier_key:1 to store for rank: 0\n",
-      "2023-01-12T18:55:33Z INFO     Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz\n",
-      "100%|██████████| 26421880/26421880 [00:02<00:00, 12562567.98it/s]\n",
-      "Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n",
-      "Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n",
-      "100%|██████████| 29515/29515 [00:00<00:00, 211170.82it/s]\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n",
-      "100%|██████████| 4422102/4422102 [00:00<00:00, 4511582.77it/s]\n",
-      "Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n",
-      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n",
-      "100%|██████████| 5148/5148 [00:00<00:00, 23675742.32it/s]\n",
-      "Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n",
-      "\n",
-      "2023-01-12T18:55:39Z INFO     Start training for RANK: 0. WORLD_SIZE: 4\n",
-      "2023-01-12T18:55:40Z INFO     Train Epoch: 0 [0/60000 (0%)]\tloss=2.3033\n",
-      "2023-01-12T18:55:40Z INFO     Reducer buckets have been rebuilt in this iteration.\n",
-      "2023-01-12T18:55:42Z INFO     Train Epoch: 0 [320/60000 (1%)]\tloss=2.3035\n",
-      "2023-01-12T18:55:43Z INFO     Train Epoch: 0 [640/60000 (1%)]\tloss=2.2942\n",
-      "2023-01-12T18:55:43Z INFO     Train Epoch: 0 [960/60000 (2%)]\tloss=2.2920\n",
-      "2023-01-12T18:55:44Z INFO     Train Epoch: 0 [1280/60000 (2%)]\tloss=2.2875\n",
-      "2023-01-12T18:55:45Z INFO     Train Epoch: 0 [1600/60000 (3%)]\tloss=2.2658\n",
-      "2023-01-12T18:55:46Z INFO     Train Epoch: 0 [1920/60000 (3%)]\tloss=2.2676\n",
-      "2023-01-12T18:55:46Z INFO     Train Epoch: 0 [2240/60000 (4%)]\tloss=2.2092\n",
-      "2023-01-12T18:55:47Z INFO     Train Epoch: 0 [2560/60000 (4%)]\tloss=2.2292\n",
-      "2023-01-12T18:55:47Z INFO     Train Epoch: 0 [2880/60000 (5%)]\tloss=2.2402\n",
-      "2023-01-12T18:55:48Z INFO     Train Epoch: 0 [3200/60000 (5%)]\tloss=2.1984\n",
-      "2023-01-12T18:55:48Z INFO     Train Epoch: 0 [3520/60000 (6%)]\tloss=2.1415\n",
-      "2023-01-12T18:55:49Z INFO     Train Epoch: 0 [3840/60000 (6%)]\tloss=2.0092\n",
-      "2023-01-12T18:55:49Z INFO     Train Epoch: 0 [4160/60000 (7%)]\tloss=1.8847\n",
-      "2023-01-12T18:55:50Z INFO     Train Epoch: 0 [4480/60000 (7%)]\tloss=1.8625\n",
-      "2023-01-12T18:55:51Z INFO     Train Epoch: 0 [4800/60000 (8%)]\tloss=1.5723\n",
-      "2023-01-12T18:55:51Z INFO     Train Epoch: 0 [5120/60000 (9%)]\tloss=1.4135\n",
-      "2023-01-12T18:55:52Z INFO     Train Epoch: 0 [5440/60000 (9%)]\tloss=1.3640\n",
-      "2023-01-12T18:55:52Z INFO     Train Epoch: 0 [5760/60000 (10%)]\tloss=1.3703\n",
-      "2023-01-12T18:55:53Z INFO     Train Epoch: 0 [6080/60000 (10%)]\tloss=1.1940\n",
-      "2023-01-12T18:55:53Z INFO     Train Epoch: 0 [6400/60000 (11%)]\tloss=1.1059\n",
-      "2023-01-12T18:55:54Z INFO     Train Epoch: 0 [6720/60000 (11%)]\tloss=1.2499\n",
-      "2023-01-12T18:55:54Z INFO     Train Epoch: 0 [7040/60000 (12%)]\tloss=0.9975\n",
-      "2023-01-12T18:55:55Z INFO     Train Epoch: 0 [7360/60000 (12%)]\tloss=1.0447\n",
-      "2023-01-12T18:55:56Z INFO     Train Epoch: 0 [7680/60000 (13%)]\tloss=1.0539\n",
-      "2023-01-12T18:55:56Z INFO     Train Epoch: 0 [8000/60000 (13%)]\tloss=1.2946\n",
-      "2023-01-12T18:55:57Z INFO     Train Epoch: 0 [8320/60000 (14%)]\tloss=1.0458\n",
-      "2023-01-12T18:55:57Z INFO     Train Epoch: 0 [8640/60000 (14%)]\tloss=1.1081\n",
-      "2023-01-12T18:55:58Z INFO     Train Epoch: 0 [8960/60000 (15%)]\tloss=1.2158\n",
-      "2023-01-12T18:56:01Z INFO     Train Epoch: 0 [9280/60000 (15%)]\tloss=0.6873\n",
-      "2023-01-12T18:56:01Z INFO     Train Epoch: 0 [9600/60000 (16%)]\tloss=1.3140\n",
-      "2023-01-12T18:56:02Z INFO     Train Epoch: 0 [9920/60000 (17%)]\tloss=0.9072\n",
-      "2023-01-12T18:56:02Z INFO     Train Epoch: 0 [10240/60000 (17%)]\tloss=1.1416\n",
-      "2023-01-12T18:56:03Z INFO     Train Epoch: 0 [10560/60000 (18%)]\tloss=1.2440\n",
-      "2023-01-12T18:56:04Z INFO     Train Epoch: 0 [10880/60000 (18%)]\tloss=0.9684\n",
-      "2023-01-12T18:56:04Z INFO     Train Epoch: 0 [11200/60000 (19%)]\tloss=0.7044\n",
-      "2023-01-12T18:56:05Z INFO     Train Epoch: 0 [11520/60000 (19%)]\tloss=0.9956\n",
-      "2023-01-12T18:56:05Z INFO     Train Epoch: 0 [11840/60000 (20%)]\tloss=1.1197\n",
-      "2023-01-12T18:56:06Z INFO     Train Epoch: 0 [12160/60000 (20%)]\tloss=0.9295\n",
-      "2023-01-12T18:56:06Z INFO     Train Epoch: 0 [12480/60000 (21%)]\tloss=0.7795\n",
-      "2023-01-12T18:56:07Z INFO     Train Epoch: 0 [12800/60000 (21%)]\tloss=0.8194\n",
-      "2023-01-12T18:56:07Z INFO     Train Epoch: 0 [13120/60000 (22%)]\tloss=1.1227\n",
-      "2023-01-12T18:56:08Z INFO     Train Epoch: 0 [13440/60000 (22%)]\tloss=0.9001\n",
-      "2023-01-12T18:56:08Z INFO     Train Epoch: 0 [13760/60000 (23%)]\tloss=0.9062\n",
-      "2023-01-12T18:56:09Z INFO     Train Epoch: 0 [14080/60000 (23%)]\tloss=0.9513\n",
-      "2023-01-12T18:56:10Z INFO     Train Epoch: 0 [14400/60000 (24%)]\tloss=0.8561\n",
-      "2023-01-12T18:56:11Z INFO     Train Epoch: 0 [14720/60000 (25%)]\tloss=0.7293\n",
-      "2023-01-12T18:56:12Z INFO     Train Epoch: 0 [15040/60000 (25%)]\tloss=0.8429\n",
-      "2023-01-12T18:56:12Z INFO     Train Epoch: 0 [15360/60000 (26%)]\tloss=0.9922\n",
-      "2023-01-12T18:56:13Z INFO     Train Epoch: 0 [15680/60000 (26%)]\tloss=0.7432\n",
-      "2023-01-12T18:56:15Z INFO     Train Epoch: 0 [16000/60000 (27%)]\tloss=1.0907\n",
-      "2023-01-12T18:56:16Z INFO     Train Epoch: 0 [16320/60000 (27%)]\tloss=0.5217\n",
-      "2023-01-12T18:56:16Z INFO     Train Epoch: 0 [16640/60000 (28%)]\tloss=0.9695\n",
-      "2023-01-12T18:56:17Z INFO     Train Epoch: 0 [16960/60000 (28%)]\tloss=0.7314\n",
-      "2023-01-12T18:56:17Z INFO     Train Epoch: 0 [17280/60000 (29%)]\tloss=0.8013\n",
-      "2023-01-12T18:56:18Z INFO     Train Epoch: 0 [17600/60000 (29%)]\tloss=0.6232\n",
-      "2023-01-12T18:56:18Z INFO     Train Epoch: 0 [17920/60000 (30%)]\tloss=0.6004\n",
-      "2023-01-12T18:56:19Z INFO     Train Epoch: 0 [18240/60000 (30%)]\tloss=1.1647\n",
-      "2023-01-12T18:56:19Z INFO     Train Epoch: 0 [18560/60000 (31%)]\tloss=1.1845\n",
-      "2023-01-12T18:56:20Z INFO     Train Epoch: 0 [18880/60000 (31%)]\tloss=0.7494\n",
-      "2023-01-12T18:56:21Z INFO     Train Epoch: 0 [19200/60000 (32%)]\tloss=0.6017\n",
-      "2023-01-12T18:56:21Z INFO     Train Epoch: 0 [19520/60000 (33%)]\tloss=0.8297\n",
-      "2023-01-12T18:56:22Z INFO     Train Epoch: 0 [19840/60000 (33%)]\tloss=0.8827\n",
-      "2023-01-12T18:56:22Z INFO     Train Epoch: 0 [20160/60000 (34%)]\tloss=1.1165\n",
-      "2023-01-12T18:56:23Z INFO     Train Epoch: 0 [20480/60000 (34%)]\tloss=0.5660\n",
-      "2023-01-12T18:56:23Z INFO     Train Epoch: 0 [20800/60000 (35%)]\tloss=0.9627\n",
-      "2023-01-12T18:56:24Z INFO     Train Epoch: 0 [21120/60000 (35%)]\tloss=0.4962\n",
-      "2023-01-12T18:56:24Z INFO     Train Epoch: 0 [21440/60000 (36%)]\tloss=1.0196\n",
-      "2023-01-12T18:56:25Z INFO     Train Epoch: 0 [21760/60000 (36%)]\tloss=0.7316\n",
-      "2023-01-12T18:56:25Z INFO     Train Epoch: 0 [22080/60000 (37%)]\tloss=0.7878\n",
-      "2023-01-12T18:56:26Z INFO     Train Epoch: 0 [22400/60000 (37%)]\tloss=0.5671\n",
-      "2023-01-12T18:56:27Z INFO     Train Epoch: 0 [22720/60000 (38%)]\tloss=0.6081\n",
-      "2023-01-12T18:56:27Z INFO     Train Epoch: 0 [23040/60000 (38%)]\tloss=1.0035\n",
-      "2023-01-12T18:56:28Z INFO     Train Epoch: 0 [23360/60000 (39%)]\tloss=0.5702\n",
-      "2023-01-12T18:56:30Z INFO     Train Epoch: 0 [23680/60000 (39%)]\tloss=0.7771\n",
-      "2023-01-12T18:56:31Z INFO     Train Epoch: 0 [24000/60000 (40%)]\tloss=0.9109\n",
-      "2023-01-12T18:56:32Z INFO     Train Epoch: 0 [24320/60000 (41%)]\tloss=0.8138\n",
-      "2023-01-12T18:56:32Z INFO     Train Epoch: 0 [24640/60000 (41%)]\tloss=0.7430\n",
-      "2023-01-12T18:56:33Z INFO     Train Epoch: 0 [24960/60000 (42%)]\tloss=0.7815\n",
-      "2023-01-12T18:56:33Z INFO     Train Epoch: 0 [25280/60000 (42%)]\tloss=0.5246\n",
-      "2023-01-12T18:56:34Z INFO     Train Epoch: 0 [25600/60000 (43%)]\tloss=0.7377\n",
-      "2023-01-12T18:56:34Z INFO     Train Epoch: 0 [25920/60000 (43%)]\tloss=0.6146\n",
-      "2023-01-12T18:56:35Z INFO     Train Epoch: 0 [26240/60000 (44%)]\tloss=0.9728\n",
-      "2023-01-12T18:56:35Z INFO     Train Epoch: 0 [26560/60000 (44%)]\tloss=0.7355\n",
-      "2023-01-12T18:56:36Z INFO     Train Epoch: 0 [26880/60000 (45%)]\tloss=0.6064\n",
-      "2023-01-12T18:56:36Z INFO     Train Epoch: 0 [27200/60000 (45%)]\tloss=1.0344\n",
-      "2023-01-12T18:56:37Z INFO     Train Epoch: 0 [27520/60000 (46%)]\tloss=0.4730\n",
-      "2023-01-12T18:56:38Z INFO     Train Epoch: 0 [27840/60000 (46%)]\tloss=0.7260\n",
-      "2023-01-12T18:56:38Z INFO     Train Epoch: 0 [28160/60000 (47%)]\tloss=0.8061\n",
-      "2023-01-12T18:56:39Z INFO     Train Epoch: 0 [28480/60000 (47%)]\tloss=0.8537\n",
-      "2023-01-12T18:56:39Z INFO     Train Epoch: 0 [28800/60000 (48%)]\tloss=1.0247\n",
-      "2023-01-12T18:56:40Z INFO     Train Epoch: 0 [29120/60000 (49%)]\tloss=0.6724\n",
-      "2023-01-12T18:56:41Z INFO     Train Epoch: 0 [29440/60000 (49%)]\tloss=0.9595\n",
-      "2023-01-12T18:56:43Z INFO     Train Epoch: 0 [29760/60000 (50%)]\tloss=0.7610\n",
-      "2023-01-12T18:56:44Z INFO     Train Epoch: 0 [30080/60000 (50%)]\tloss=0.9843\n",
-      "2023-01-12T18:56:45Z INFO     Train Epoch: 0 [30400/60000 (51%)]\tloss=0.6334\n",
-      "2023-01-12T18:56:45Z INFO     Train Epoch: 0 [30720/60000 (51%)]\tloss=0.6374\n",
-      "2023-01-12T18:56:46Z INFO     Train Epoch: 0 [31040/60000 (52%)]\tloss=0.5124\n",
-      "2023-01-12T18:56:46Z INFO     Train Epoch: 0 [31360/60000 (52%)]\tloss=0.5240\n",
-      "2023-01-12T18:56:47Z INFO     Train Epoch: 0 [31680/60000 (53%)]\tloss=0.6984\n",
-      "2023-01-12T18:56:47Z INFO     Train Epoch: 0 [32000/60000 (53%)]\tloss=0.8143\n",
-      "2023-01-12T18:56:48Z INFO     Train Epoch: 0 [32320/60000 (54%)]\tloss=0.6173\n",
-      "2023-01-12T18:56:49Z INFO     Train Epoch: 0 [32640/60000 (54%)]\tloss=0.6989\n",
-      "2023-01-12T18:56:49Z INFO     Train Epoch: 0 [32960/60000 (55%)]\tloss=0.6109\n",
-      "2023-01-12T18:56:50Z INFO     Train Epoch: 0 [33280/60000 (55%)]\tloss=0.5810\n",
-      "2023-01-12T18:56:50Z INFO     Train Epoch: 0 [33600/60000 (56%)]\tloss=0.5392\n",
-      "2023-01-12T18:56:51Z INFO     Train Epoch: 0 [33920/60000 (57%)]\tloss=0.4317\n",
-      "2023-01-12T18:56:51Z INFO     Train Epoch: 0 [34240/60000 (57%)]\tloss=0.4624\n",
-      "2023-01-12T18:56:52Z INFO     Train Epoch: 0 [34560/60000 (58%)]\tloss=0.3868\n",
-      "2023-01-12T18:56:52Z INFO     Train Epoch: 0 [34880/60000 (58%)]\tloss=0.6871\n",
-      "2023-01-12T18:56:53Z INFO     Train Epoch: 0 [35200/60000 (59%)]\tloss=0.5277\n",
-      "2023-01-12T18:56:54Z INFO     Train Epoch: 0 [35520/60000 (59%)]\tloss=0.5487\n",
-      "2023-01-12T18:56:54Z INFO     Train Epoch: 0 [35840/60000 (60%)]\tloss=0.5509\n",
-      "2023-01-12T18:56:55Z INFO     Train Epoch: 0 [36160/60000 (60%)]\tloss=0.7043\n",
-      "2023-01-12T18:56:55Z INFO     Train Epoch: 0 [36480/60000 (61%)]\tloss=0.7568\n",
-      "2023-01-12T18:56:56Z INFO     Train Epoch: 0 [36800/60000 (61%)]\tloss=0.6199\n",
-      "2023-01-12T18:56:56Z INFO     Train Epoch: 0 [37120/60000 (62%)]\tloss=0.7296\n",
-      "2023-01-12T18:56:57Z INFO     Train Epoch: 0 [37440/60000 (62%)]\tloss=0.5492\n",
-      "2023-01-12T18:56:58Z INFO     Train Epoch: 0 [37760/60000 (63%)]\tloss=0.4943\n",
-      "2023-01-12T18:56:59Z INFO     Train Epoch: 0 [38080/60000 (63%)]\tloss=0.8262\n",
-      "2023-01-12T18:57:01Z INFO     Train Epoch: 0 [38400/60000 (64%)]\tloss=0.6767\n",
-      "2023-01-12T18:57:02Z INFO     Train Epoch: 0 [38720/60000 (65%)]\tloss=0.6093\n",
-      "2023-01-12T18:57:02Z INFO     Train Epoch: 0 [39040/60000 (65%)]\tloss=0.5222\n",
-      "2023-01-12T18:57:03Z INFO     Train Epoch: 0 [39360/60000 (66%)]\tloss=0.4399\n",
-      "2023-01-12T18:57:03Z INFO     Train Epoch: 0 [39680/60000 (66%)]\tloss=0.6005\n",
-      "2023-01-12T18:57:04Z INFO     Train Epoch: 0 [40000/60000 (67%)]\tloss=0.5421\n",
-      "2023-01-12T18:57:04Z INFO     Train Epoch: 0 [40320/60000 (67%)]\tloss=0.4670\n",
-      "2023-01-12T18:57:05Z INFO     Train Epoch: 0 [40640/60000 (68%)]\tloss=0.2799\n",
-      "2023-01-12T18:57:06Z INFO     Train Epoch: 0 [40960/60000 (68%)]\tloss=0.5594\n",
-      "2023-01-12T18:57:06Z INFO     Train Epoch: 0 [41280/60000 (69%)]\tloss=0.7234\n",
-      "2023-01-12T18:57:07Z INFO     Train Epoch: 0 [41600/60000 (69%)]\tloss=0.8179\n",
-      "2023-01-12T18:57:08Z INFO     Train Epoch: 0 [41920/60000 (70%)]\tloss=0.5361\n",
-      "2023-01-12T18:57:08Z INFO     Train Epoch: 0 [42240/60000 (70%)]\tloss=0.6700\n",
-      "2023-01-12T18:57:09Z INFO     Train Epoch: 0 [42560/60000 (71%)]\tloss=0.4328\n",
-      "2023-01-12T18:57:09Z INFO     Train Epoch: 0 [42880/60000 (71%)]\tloss=0.7155\n",
-      "2023-01-12T18:57:10Z INFO     Train Epoch: 0 [43200/60000 (72%)]\tloss=0.6536\n",
-      "2023-01-12T18:57:11Z INFO     Train Epoch: 0 [43520/60000 (73%)]\tloss=0.4034\n",
-      "2023-01-12T18:57:12Z INFO     Train Epoch: 0 [43840/60000 (73%)]\tloss=0.6295\n",
-      "2023-01-12T18:57:13Z INFO     Train Epoch: 0 [44160/60000 (74%)]\tloss=0.6419\n",
-      "2023-01-12T18:57:15Z INFO     Train Epoch: 0 [44480/60000 (74%)]\tloss=0.4257\n",
-      "2023-01-12T18:57:15Z INFO     Train Epoch: 0 [44800/60000 (75%)]\tloss=0.6005\n",
-      "2023-01-12T18:57:16Z INFO     Train Epoch: 0 [45120/60000 (75%)]\tloss=0.5280\n",
-      "2023-01-12T18:57:17Z INFO     Train Epoch: 0 [45440/60000 (76%)]\tloss=0.7624\n",
-      "2023-01-12T18:57:17Z INFO     Train Epoch: 0 [45760/60000 (76%)]\tloss=0.4500\n",
-      "2023-01-12T18:57:18Z INFO     Train Epoch: 0 [46080/60000 (77%)]\tloss=0.6136\n",
-      "2023-01-12T18:57:18Z INFO     Train Epoch: 0 [46400/60000 (77%)]\tloss=0.4631\n",
-      "2023-01-12T18:57:19Z INFO     Train Epoch: 0 [46720/60000 (78%)]\tloss=0.6543\n",
-      "2023-01-12T18:57:19Z INFO     Train Epoch: 0 [47040/60000 (78%)]\tloss=0.3783\n",
-      "2023-01-12T18:57:20Z INFO     Train Epoch: 0 [47360/60000 (79%)]\tloss=0.6068\n",
-      "2023-01-12T18:57:20Z INFO     Train Epoch: 0 [47680/60000 (79%)]\tloss=0.4288\n",
-      "2023-01-12T18:57:21Z INFO     Train Epoch: 0 [48000/60000 (80%)]\tloss=0.5632\n",
-      "2023-01-12T18:57:22Z INFO     Train Epoch: 0 [48320/60000 (81%)]\tloss=0.5509\n",
-      "2023-01-12T18:57:22Z INFO     Train Epoch: 0 [48640/60000 (81%)]\tloss=0.7985\n",
-      "2023-01-12T18:57:23Z INFO     Train Epoch: 0 [48960/60000 (82%)]\tloss=0.5953\n",
-      "2023-01-12T18:57:23Z INFO     Train Epoch: 0 [49280/60000 (82%)]\tloss=0.6759\n",
-      "2023-01-12T18:57:24Z INFO     Train Epoch: 0 [49600/60000 (83%)]\tloss=0.3233\n",
-      "2023-01-12T18:57:24Z INFO     Train Epoch: 0 [49920/60000 (83%)]\tloss=0.3583\n",
-      "2023-01-12T18:57:25Z INFO     Train Epoch: 0 [50240/60000 (84%)]\tloss=0.5348\n",
-      "2023-01-12T18:57:25Z INFO     Train Epoch: 0 [50560/60000 (84%)]\tloss=0.8532\n",
-      "2023-01-12T18:57:26Z INFO     Train Epoch: 0 [50880/60000 (85%)]\tloss=0.4251\n",
-      "2023-01-12T18:57:27Z INFO     Train Epoch: 0 [51200/60000 (85%)]\tloss=0.4953\n",
-      "2023-01-12T18:57:27Z INFO     Train Epoch: 0 [51520/60000 (86%)]\tloss=0.5538\n",
-      "2023-01-12T18:57:28Z INFO     Train Epoch: 0 [51840/60000 (86%)]\tloss=0.7728\n",
-      "2023-01-12T18:57:29Z INFO     Train Epoch: 0 [52160/60000 (87%)]\tloss=0.4604\n",
-      "2023-01-12T18:57:31Z INFO     Train Epoch: 0 [52480/60000 (87%)]\tloss=0.8828\n",
-      "2023-01-12T18:57:32Z INFO     Train Epoch: 0 [52800/60000 (88%)]\tloss=0.5369\n",
-      "2023-01-12T18:57:32Z INFO     Train Epoch: 0 [53120/60000 (89%)]\tloss=0.7731\n",
-      "2023-01-12T18:57:33Z INFO     Train Epoch: 0 [53440/60000 (89%)]\tloss=0.6234\n",
-      "2023-01-12T18:57:33Z INFO     Train Epoch: 0 [53760/60000 (90%)]\tloss=0.5501\n",
-      "2023-01-12T18:57:34Z INFO     Train Epoch: 0 [54080/60000 (90%)]\tloss=0.7707\n",
-      "2023-01-12T18:57:34Z INFO     Train Epoch: 0 [54400/60000 (91%)]\tloss=0.7441\n",
-      "2023-01-12T18:57:35Z INFO     Train Epoch: 0 [54720/60000 (91%)]\tloss=0.5040\n",
-      "2023-01-12T18:57:36Z INFO     Train Epoch: 0 [55040/60000 (92%)]\tloss=0.4233\n",
-      "2023-01-12T18:57:36Z INFO     Train Epoch: 0 [55360/60000 (92%)]\tloss=0.4983\n",
-      "2023-01-12T18:57:37Z INFO     Train Epoch: 0 [55680/60000 (93%)]\tloss=0.5547\n",
-      "2023-01-12T18:57:37Z INFO     Train Epoch: 0 [56000/60000 (93%)]\tloss=0.7808\n",
-      "2023-01-12T18:57:38Z INFO     Train Epoch: 0 [56320/60000 (94%)]\tloss=0.5937\n",
-      "2023-01-12T18:57:38Z INFO     Train Epoch: 0 [56640/60000 (94%)]\tloss=0.3243\n",
-      "2023-01-12T18:57:39Z INFO     Train Epoch: 0 [56960/60000 (95%)]\tloss=0.7926\n",
-      "2023-01-12T18:57:39Z INFO     Train Epoch: 0 [57280/60000 (95%)]\tloss=0.5203\n",
-      "2023-01-12T18:57:40Z INFO     Train Epoch: 0 [57600/60000 (96%)]\tloss=0.5806\n",
-      "2023-01-12T18:57:41Z INFO     Train Epoch: 0 [57920/60000 (97%)]\tloss=0.2864\n",
-      "2023-01-12T18:57:42Z INFO     Train Epoch: 0 [58240/60000 (97%)]\tloss=0.4806\n",
-      "2023-01-12T18:57:43Z INFO     Train Epoch: 0 [58560/60000 (98%)]\tloss=0.5448\n",
-      "2023-01-12T18:57:44Z INFO     Train Epoch: 0 [58880/60000 (98%)]\tloss=0.7353\n",
-      "2023-01-12T18:57:45Z INFO     Train Epoch: 0 [59200/60000 (99%)]\tloss=0.3771\n",
-      "2023-01-12T18:57:45Z INFO     Train Epoch: 0 [59520/60000 (99%)]\tloss=0.5527\n",
-      "2023-01-12T18:57:46Z INFO     Train Epoch: 0 [59840/60000 (100%)]\tloss=0.5935\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "training_client.get_job_logs(pytorchjob_name, container=\"pytorch\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "17b0ca43-1936-4708-b03b-3ab9ac2bbdea",
-   "metadata": {},
-   "source": [
-    "## Delete PyTorchJob\n",
-    "\n",
-    "When PyTorchJob is finished, you can delete the resource."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "32ae88fd-5b5d-4ba1-a560-9a35c5ac17de",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "PyTorchJob kubeflow-user-example-com/train-pytorch has been deleted\n"
-     ]
-    }
-   ],
-   "source": [
-    "training_client.delete_pytorchjob(pytorchjob_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b9641e9f-551d-44d5-872b-002fffaedcef",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/sdk/python/examples/kubeflow-tfjob-sdk.ipynb b/sdk/python/examples/kubeflow-tfjob-sdk.ipynb
deleted file mode 100644
index 1c0112b91d..0000000000
--- a/sdk/python/examples/kubeflow-tfjob-sdk.ipynb
+++ /dev/null
@@ -1,714 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "# Sample for Kubeflow TFJob SDK"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "TODO (andreyvelich): This example should be updated with the new SDK version.\n",
-    "\n",
-    "This is a sample for Kubeflow TFJob SDK `kubeflow-tfjob`.\n",
-    "\n",
-    "The notebook shows how to use Kubeflow TFJob SDK to create, get, wait, check and delete tfjob."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from kubernetes.client import V1PodTemplateSpec\n",
-    "from kubernetes.client import V1ObjectMeta\n",
-    "from kubernetes.client import V1PodSpec\n",
-    "from kubernetes.client import V1Container\n",
-    "\n",
-    "from kubeflow.training import constants\n",
-    "from kubeflow.training.utils import utils\n",
-    "from kubeflow.training import V1ReplicaSpec\n",
-    "from kubeflow.training import KubeflowOrgV1TFJob\n",
-    "from kubeflow.training import KubeflowOrgV1TFJobSpec\n",
-    "from kubeflow.training import V1RunPolicy\n",
-    "from kubeflow.training import TFJobClient"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "Define namespace where tfjob needs to be created to. If not specified, below function defines namespace to the current one where SDK is running in the cluster, otherwise it will deploy to default namespace."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "namespace = utils.get_default_target_namespace()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Define TFJob"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "The demo only creates a worker of TFJob to run mnist sample."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "container = V1Container(\n",
-    "    name=\"tensorflow\",\n",
-    "    image=\"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0\",\n",
-    "    command=[\n",
-    "        \"python\",\n",
-    "        \"/var/tf_mnist/mnist_with_summaries.py\",\n",
-    "        \"--log_dir=/train/logs\", \"--learning_rate=0.01\",\n",
-    "        \"--batch_size=150\"\n",
-    "        ]\n",
-    ")\n",
-    "\n",
-    "worker = V1ReplicaSpec(\n",
-    "    replicas=2,\n",
-    "    restart_policy=\"Never\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        spec=V1PodSpec(\n",
-    "            containers=[container]\n",
-    "        )\n",
-    "    )\n",
-    ")\n",
-    "\n",
-    "chief = V1ReplicaSpec(\n",
-    "    replicas=1,\n",
-    "    restart_policy=\"Never\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        spec=V1PodSpec(\n",
-    "            containers=[container]\n",
-    "        )\n",
-    "    )\n",
-    ")\n",
-    "\n",
-    "ps = V1ReplicaSpec(\n",
-    "    replicas=1,\n",
-    "    restart_policy=\"Never\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        spec=V1PodSpec(\n",
-    "            containers=[container]\n",
-    "        )\n",
-    "    )\n",
-    ")\n",
-    "\n",
-    "tfjob = KubeflowOrgV1TFJob(\n",
-    "    api_version=\"kubeflow.org/v1\",\n",
-    "    kind=\"TFJob\",\n",
-    "    metadata=V1ObjectMeta(name=\"mnist\",namespace=namespace),\n",
-    "    spec=KubeflowOrgV1TFJobSpec(\n",
-    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
-    "        tf_replica_specs={\"Worker\": worker,\n",
-    "                          \"Chief\": chief,\n",
-    "                          \"PS\": ps}\n",
-    "    )\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Create TFJob"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'apiVersion': 'kubeflow.org/v1',\n",
-       " 'kind': 'TFJob',\n",
-       " 'metadata': {'creationTimestamp': '2021-10-02T19:02:08Z',\n",
-       "  'generation': 1,\n",
-       "  'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n",
-       "    'fieldsType': 'FieldsV1',\n",
-       "    'fieldsV1': {'f:spec': {'.': {},\n",
-       "      'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}},\n",
-       "      'f:tfReplicaSpecs': {'.': {},\n",
-       "       'f:Chief': {'.': {},\n",
-       "        'f:replicas': {},\n",
-       "        'f:restartPolicy': {},\n",
-       "        'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n",
-       "       'f:PS': {'.': {},\n",
-       "        'f:replicas': {},\n",
-       "        'f:restartPolicy': {},\n",
-       "        'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n",
-       "       'f:Worker': {'.': {},\n",
-       "        'f:replicas': {},\n",
-       "        'f:restartPolicy': {},\n",
-       "        'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n",
-       "    'manager': 'OpenAPI-Generator',\n",
-       "    'operation': 'Update',\n",
-       "    'time': '2021-10-02T19:02:08Z'}],\n",
-       "  'name': 'mnist',\n",
-       "  'namespace': 'default',\n",
-       "  'resourceVersion': '6042',\n",
-       "  'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'},\n",
-       " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n",
-       "  'tfReplicaSpecs': {'Chief': {'replicas': 1,\n",
-       "    'restartPolicy': 'Never',\n",
-       "    'template': {'spec': {'containers': [{'command': ['python',\n",
-       "         '/var/tf_mnist/mnist_with_summaries.py',\n",
-       "         '--log_dir=/train/logs',\n",
-       "         '--learning_rate=0.01',\n",
-       "         '--batch_size=150'],\n",
-       "        'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n",
-       "        'name': 'tensorflow'}]}}},\n",
-       "   'PS': {'replicas': 1,\n",
-       "    'restartPolicy': 'Never',\n",
-       "    'template': {'spec': {'containers': [{'command': ['python',\n",
-       "         '/var/tf_mnist/mnist_with_summaries.py',\n",
-       "         '--log_dir=/train/logs',\n",
-       "         '--learning_rate=0.01',\n",
-       "         '--batch_size=150'],\n",
-       "        'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n",
-       "        'name': 'tensorflow'}]}}},\n",
-       "   'Worker': {'replicas': 2,\n",
-       "    'restartPolicy': 'Never',\n",
-       "    'template': {'spec': {'containers': [{'command': ['python',\n",
-       "         '/var/tf_mnist/mnist_with_summaries.py',\n",
-       "         '--log_dir=/train/logs',\n",
-       "         '--learning_rate=0.01',\n",
-       "         '--batch_size=150'],\n",
-       "        'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n",
-       "        'name': 'tensorflow'}]}}}}}}"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tfjob_client = TFJobClient()\n",
-    "tfjob_client.create(tfjob, namespace=namespace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Get the created TFJob "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'apiVersion': 'kubeflow.org/v1',\n",
-       " 'kind': 'TFJob',\n",
-       " 'metadata': {'creationTimestamp': '2021-10-02T19:02:08Z',\n",
-       "  'generation': 1,\n",
-       "  'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n",
-       "    'fieldsType': 'FieldsV1',\n",
-       "    'fieldsV1': {'f:spec': {'.': {},\n",
-       "      'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}},\n",
-       "      'f:tfReplicaSpecs': {'.': {},\n",
-       "       'f:Chief': {'.': {},\n",
-       "        'f:replicas': {},\n",
-       "        'f:restartPolicy': {},\n",
-       "        'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n",
-       "       'f:PS': {'.': {},\n",
-       "        'f:replicas': {},\n",
-       "        'f:restartPolicy': {},\n",
-       "        'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n",
-       "       'f:Worker': {'.': {},\n",
-       "        'f:replicas': {},\n",
-       "        'f:restartPolicy': {},\n",
-       "        'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n",
-       "    'manager': 'OpenAPI-Generator',\n",
-       "    'operation': 'Update',\n",
-       "    'time': '2021-10-02T19:02:08Z'},\n",
-       "   {'apiVersion': 'kubeflow.org/v1',\n",
-       "    'fieldsType': 'FieldsV1',\n",
-       "    'fieldsV1': {'f:status': {'.': {},\n",
-       "      'f:conditions': {},\n",
-       "      'f:replicaStatuses': {'.': {},\n",
-       "       'f:Chief': {'.': {}, 'f:active': {}},\n",
-       "       'f:PS': {'.': {}, 'f:active': {}},\n",
-       "       'f:Worker': {}},\n",
-       "      'f:startTime': {}}},\n",
-       "    'manager': 'manager',\n",
-       "    'operation': 'Update',\n",
-       "    'time': '2021-10-02T19:02:10Z'}],\n",
-       "  'name': 'mnist',\n",
-       "  'namespace': 'default',\n",
-       "  'resourceVersion': '6105',\n",
-       "  'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'},\n",
-       " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n",
-       "  'tfReplicaSpecs': {'Chief': {'replicas': 1,\n",
-       "    'restartPolicy': 'Never',\n",
-       "    'template': {'spec': {'containers': [{'command': ['python',\n",
-       "         '/var/tf_mnist/mnist_with_summaries.py',\n",
-       "         '--log_dir=/train/logs',\n",
-       "         '--learning_rate=0.01',\n",
-       "         '--batch_size=150'],\n",
-       "        'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n",
-       "        'name': 'tensorflow'}]}}},\n",
-       "   'PS': {'replicas': 1,\n",
-       "    'restartPolicy': 'Never',\n",
-       "    'template': {'spec': {'containers': [{'command': ['python',\n",
-       "         '/var/tf_mnist/mnist_with_summaries.py',\n",
-       "         '--log_dir=/train/logs',\n",
-       "         '--learning_rate=0.01',\n",
-       "         '--batch_size=150'],\n",
-       "        'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n",
-       "        'name': 'tensorflow'}]}}},\n",
-       "   'Worker': {'replicas': 2,\n",
-       "    'restartPolicy': 'Never',\n",
-       "    'template': {'spec': {'containers': [{'command': ['python',\n",
-       "         '/var/tf_mnist/mnist_with_summaries.py',\n",
-       "         '--log_dir=/train/logs',\n",
-       "         '--learning_rate=0.01',\n",
-       "         '--batch_size=150'],\n",
-       "        'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n",
-       "        'name': 'tensorflow'}]}}}}},\n",
-       " 'status': {'conditions': [{'lastTransitionTime': '2021-10-02T19:02:08Z',\n",
-       "    'lastUpdateTime': '2021-10-02T19:02:08Z',\n",
-       "    'message': 'TFJob mnist is created.',\n",
-       "    'reason': 'TFJobCreated',\n",
-       "    'status': 'True',\n",
-       "    'type': 'Created'},\n",
-       "   {'lastTransitionTime': '2021-10-02T19:02:10Z',\n",
-       "    'lastUpdateTime': '2021-10-02T19:02:10Z',\n",
-       "    'message': 'TFJob default/mnist is running.',\n",
-       "    'reason': 'TFJobRunning',\n",
-       "    'status': 'True',\n",
-       "    'type': 'Running'}],\n",
-       "  'replicaStatuses': {'Chief': {'active': 1},\n",
-       "   'PS': {'active': 1},\n",
-       "   'Worker': {}},\n",
-       "  'startTime': '2021-10-02T19:02:09Z'}}"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tfjob_client.get('mnist', namespace=namespace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Get the TFJob status, check if the TFJob has been started."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Running'"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tfjob_client.get_job_status('mnist', namespace=namespace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Wait for the specified job to finish"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "NAME                           STATE                TIME                          \n",
-      "mnist                          Running              2021-10-02T19:02:10Z          \n",
-      "mnist                          Running              2021-10-02T19:02:10Z          \n",
-      "mnist                          Running              2021-10-02T19:02:10Z          \n",
-      "mnist                          Succeeded            2021-10-02T19:04:10Z          \n"
-     ]
-    }
-   ],
-   "source": [
-    "tfjob_client.wait_for_job('mnist', namespace=namespace, watch=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Check if the TFJob succeeded"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tfjob_client.is_job_succeeded('mnist', namespace=namespace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Get the TFJob training logs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The logs of Pod mnist-chief-0:\n",
-      " WARNING:tensorflow:From /var/tf_mnist/mnist_with_summaries.py:39: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n",
-      "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Please write your own downloading logic.\n",
-      "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:252: wrapped_fn (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Please use urllib or similar directly.\n",
-      "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Please use tf.data to implement this functionality.\n",
-      "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Please use tf.data to implement this functionality.\n",
-      "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: __init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n",
-      "2021-10-02 19:02:25.434889: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
-      "Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\n",
-      "Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz\n",
-      "Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n",
-      "Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz\n",
-      "Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\n",
-      "Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz\n",
-      "Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n",
-      "Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz\n",
-      "Accuracy at step 0: 0.1348\n",
-      "Accuracy at step 10: 0.787\n",
-      "Accuracy at step 20: 0.8648\n",
-      "Accuracy at step 30: 0.9056\n",
-      "Accuracy at step 40: 0.9162\n",
-      "Accuracy at step 50: 0.9237\n",
-      "Accuracy at step 60: 0.926\n",
-      "Accuracy at step 70: 0.9365\n",
-      "Accuracy at step 80: 0.9371\n",
-      "Accuracy at step 90: 0.9352\n",
-      "Adding run metadata for 99\n",
-      "Accuracy at step 100: 0.9439\n",
-      "Accuracy at step 110: 0.9434\n",
-      "Accuracy at step 120: 0.9382\n",
-      "Accuracy at step 130: 0.9444\n",
-      "Accuracy at step 140: 0.9487\n",
-      "Accuracy at step 150: 0.9462\n",
-      "Accuracy at step 160: 0.9454\n",
-      "Accuracy at step 170: 0.9426\n",
-      "Accuracy at step 180: 0.9473\n",
-      "Accuracy at step 190: 0.9536\n",
-      "Adding run metadata for 199\n",
-      "Accuracy at step 200: 0.9559\n",
-      "Accuracy at step 210: 0.9519\n",
-      "Accuracy at step 220: 0.9485\n",
-      "Accuracy at step 230: 0.95\n",
-      "Accuracy at step 240: 0.9563\n",
-      "Accuracy at step 250: 0.9575\n",
-      "Accuracy at step 260: 0.9591\n",
-      "Accuracy at step 270: 0.9589\n",
-      "Accuracy at step 280: 0.957\n",
-      "Accuracy at step 290: 0.9581\n",
-      "Adding run metadata for 299\n",
-      "Accuracy at step 300: 0.9606\n",
-      "Accuracy at step 310: 0.9585\n",
-      "Accuracy at step 320: 0.9593\n",
-      "Accuracy at step 330: 0.958\n",
-      "Accuracy at step 340: 0.9537\n",
-      "Accuracy at step 350: 0.961\n",
-      "Accuracy at step 360: 0.9615\n",
-      "Accuracy at step 370: 0.962\n",
-      "Accuracy at step 380: 0.956\n",
-      "Accuracy at step 390: 0.9591\n",
-      "Adding run metadata for 399\n",
-      "Accuracy at step 400: 0.9554\n",
-      "Accuracy at step 410: 0.9604\n",
-      "Accuracy at step 420: 0.9638\n",
-      "Accuracy at step 430: 0.9614\n",
-      "Accuracy at step 440: 0.9645\n",
-      "Accuracy at step 450: 0.9683\n",
-      "Accuracy at step 460: 0.9591\n",
-      "Accuracy at step 470: 0.9645\n",
-      "Accuracy at step 480: 0.9557\n",
-      "Accuracy at step 490: 0.9647\n",
-      "Adding run metadata for 499\n",
-      "Accuracy at step 500: 0.9611\n",
-      "Accuracy at step 510: 0.9623\n",
-      "Accuracy at step 520: 0.9606\n",
-      "Accuracy at step 530: 0.9661\n",
-      "Accuracy at step 540: 0.9684\n",
-      "Accuracy at step 550: 0.9629\n",
-      "Accuracy at step 560: 0.9605\n",
-      "Accuracy at step 570: 0.9672\n",
-      "Accuracy at step 580: 0.9712\n",
-      "Accuracy at step 590: 0.9649\n",
-      "Adding run metadata for 599\n",
-      "Accuracy at step 600: 0.9679\n",
-      "Accuracy at step 610: 0.9689\n",
-      "Accuracy at step 620: 0.9664\n",
-      "Accuracy at step 630: 0.9667\n",
-      "Accuracy at step 640: 0.9644\n",
-      "Accuracy at step 650: 0.9721\n",
-      "Accuracy at step 660: 0.965\n",
-      "Accuracy at step 670: 0.9646\n",
-      "Accuracy at step 680: 0.9661\n",
-      "Accuracy at step 690: 0.9623\n",
-      "Adding run metadata for 699\n",
-      "Accuracy at step 700: 0.9581\n",
-      "Accuracy at step 710: 0.9649\n",
-      "Accuracy at step 720: 0.9633\n",
-      "Accuracy at step 730: 0.9659\n",
-      "Accuracy at step 740: 0.9607\n",
-      "Accuracy at step 750: 0.9676\n",
-      "Accuracy at step 760: 0.9697\n",
-      "Accuracy at step 770: 0.9662\n",
-      "Accuracy at step 780: 0.9659\n",
-      "Accuracy at step 790: 0.9633\n",
-      "Adding run metadata for 799\n",
-      "Accuracy at step 800: 0.9638\n",
-      "Accuracy at step 810: 0.9592\n",
-      "Accuracy at step 820: 0.9642\n",
-      "Accuracy at step 830: 0.9682\n",
-      "Accuracy at step 840: 0.9695\n",
-      "Accuracy at step 850: 0.9657\n",
-      "Accuracy at step 860: 0.9696\n",
-      "Accuracy at step 870: 0.9695\n",
-      "Accuracy at step 880: 0.9711\n",
-      "Accuracy at step 890: 0.9687\n",
-      "Adding run metadata for 899\n",
-      "Accuracy at step 900: 0.9689\n",
-      "Accuracy at step 910: 0.9699\n",
-      "Accuracy at step 920: 0.9677\n",
-      "Accuracy at step 930: 0.9689\n",
-      "Accuracy at step 940: 0.9702\n",
-      "Accuracy at step 950: 0.9716\n",
-      "Accuracy at step 960: 0.9692\n",
-      "Accuracy at step 970: 0.967\n",
-      "Accuracy at step 980: 0.9687\n",
-      "Accuracy at step 990: 0.9665\n",
-      "Adding run metadata for 999\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "tfjob_client.get_logs('mnist', namespace=namespace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "### Delete the TFJob"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'kind': 'Status',\n",
-       " 'apiVersion': 'v1',\n",
-       " 'metadata': {},\n",
-       " 'status': 'Success',\n",
-       " 'details': {'name': 'mnist',\n",
-       "  'group': 'kubeflow.org',\n",
-       "  'kind': 'tfjobs',\n",
-       "  'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'}}"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tfjob_client.delete('mnist', namespace=namespace)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/sdk/python/kubeflow/__init__.py b/sdk/python/kubeflow/__init__.py
index 0260537a02..69e3be50da 100644
--- a/sdk/python/kubeflow/__init__.py
+++ b/sdk/python/kubeflow/__init__.py
@@ -1 +1 @@
-__path__ = __import__('pkgutil').extend_path(__path__, __name__)
\ No newline at end of file
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/sdk/python/kubeflow/training/__init__.py b/sdk/python/kubeflow/training/__init__.py
index 40b61910b4..64a90ee2ef 100644
--- a/sdk/python/kubeflow/training/__init__.py
+++ b/sdk/python/kubeflow/training/__init__.py
@@ -56,3 +56,4 @@
 from kubeflow.training.models.kubeflow_org_v1_xg_boost_job_spec import KubeflowOrgV1XGBoostJobSpec
 
 from kubeflow.training.api.training_client import TrainingClient
+from kubeflow.training.constants import constants
diff --git a/sdk/python/kubeflow/training/api/training_client.py b/sdk/python/kubeflow/training/api/training_client.py
index 1b78f28749..725a1d24b9 100644
--- a/sdk/python/kubeflow/training/api/training_client.py
+++ b/sdk/python/kubeflow/training/api/training_client.py
@@ -15,7 +15,7 @@
 import multiprocessing
 import logging
 import time
-from typing import Callable, List, Dict, Any, Set
+from typing import Optional, Callable, List, Dict, Any, Set
 import queue
 from kubernetes import client, config, watch
 
@@ -36,9 +36,11 @@
 class TrainingClient(object):
     def __init__(
         self,
-        config_file: str = None,
-        context: str = None,
-        client_configuration: client.Configuration = None,
+        config_file: Optional[str] = None,
+        context: Optional[str] = None,
+        client_configuration: Optional[client.Configuration] = None,
+        namespace: str = utils.get_default_target_namespace(),
+        job_kind: str = constants.PYTORCHJOB_KIND,
     ):
         """TrainingClient constructor.
 
@@ -47,8 +49,17 @@ def __init__(
             context: Set the active context. Defaults to current_context from the kube-config.
             client_configuration: Client configuration for cluster authentication.
                 You have to provide valid configuration with Bearer token or
-                with username and password.
-                You can find an example here: https://github.com/kubernetes-client/python/blob/67f9c7a97081b4526470cad53576bc3b71fa6fcc/examples/remote_cluster.py#L31
+                with username and password. You can find an example here:
+                https://github.com/kubernetes-client/python/blob/67f9c7a97081b4526470cad53576bc3b71fa6fcc/examples/remote_cluster.py#L31
+            namespace: Target Kubernetes namespace. By default it takes namespace
+                from `/var/run/secrets/kubernetes.io/serviceaccount/namespace` location
+                or set as `default`. Namespace can be overridden during method invocations.
+            job_kind: Target Training Job kind (e.g. `TFJob`, `PyTorchJob`, `MPIJob`).
+                Job kind can be overridden during method invocations.
+                The default Job kind is `PyTorchJob`.
+
+        Raises:
+            ValueError: Job kind is invalid.
         """
 
         # If client configuration is not set, use kube-config to access Kubernetes APIs.
@@ -64,62 +75,324 @@ def __init__(
         self.core_api = client.CoreV1Api(k8s_client)
         self.api_client = ApiClient()
 
-    # ------------------------------------------------------------------------ #
-    # Common Training Client APIs.
-    # ------------------------------------------------------------------------ #
-    def get_job_conditions(
+        self.namespace = namespace
+        if job_kind not in constants.JOB_PARAMETERS:
+            raise ValueError(
+                f"Job kind must be one of these: {list(constants.JOB_PARAMETERS.keys())}"
+            )
+        self.job_kind = job_kind
+
+    def create_job(
+        self,
+        job: Optional[constants.JOB_MODELS_TYPE] = None,
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        base_image: Optional[str] = None,
+        train_func: Optional[Callable] = None,
+        parameters: Optional[Dict[str, Any]] = None,
+        num_worker_replicas: Optional[int] = None,
+        num_chief_replicas: Optional[int] = None,
+        num_ps_replicas: Optional[int] = None,
+        packages_to_install: Optional[List[str]] = None,
+        pip_index_url: str = constants.DEFAULT_PIP_INDEX_URL,
+    ):
+        """Create the Training Job.
+        Job can be created using one of the following options:
+
+        - Define custom resource object in `job` parameter (e.g. TFJob or PyTorchJob).
+        - Define training function in `train_func` parameter and number of workers.
+        - Define Docker image in `base_image` parameter and number of workers.
+
+        Args:
+            job: Job object. Object must be one of these types: KubeflowOrgV1TFJob,
+                KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob, etc.
+            name: Name for the Job. It must be set if `job` parameter is omitted.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). It must be set if
+                `job` parameter is omitted. By default Job kind is taken from
+                `TrainingClient` object.
+            base_image: Image that Job uses to train the model on each training replica.
+                If `train_func` parameter is set, this image is used to execute the training
+                function. The `constants` module contains some base images, the default image
+                is `docker.io/pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime`
+            train_func: Function that Job uses to train the model on each training replica.
+                This function must be Callable. Optionally, this function might have one dict
+                argument to define input parameters for the function. If `train_func` is
+                set, Base Image must support `bash` CLI to execute the training script.
+            parameters: Dict of input parameters that training function might receive.
+            num_worker_replicas: Number of Worker replicas for the Job.
+            num_chief_replicas: Number of Chief replicas for the TFJob. Number
+                of Chief replicas can't be more than 1.
+            num_ps_replicas: Number of Parameter Server replicas for the TFJob.
+            packages_to_install: List of Python packages to install in addition
+                to the base image packages if `train_func` parameter is set.
+                These packages are installed before executing the objective function.
+            pip_index_url: The PyPI url from which to install Python packages.
+
+        Raises:
+            ValueError: Invalid input parameters.
+            TimeoutError: Timeout to create Job.
+            RuntimeError: Failed to create Job.
+        """
+
+        # When Job is set, only namespace arg is allowed.
+        if job is not None:
+            for key, value in locals().items():
+                if (
+                    key not in ["self", "job", "namespace", "pip_index_url"]
+                    and value is not None
+                ):
+                    raise ValueError(
+                        "If `job` is set only `namespace` argument is allowed. "
+                        f"Argument `{key}` must be None."
+                    )
+
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
+        if job is not None:
+            job_kind = job.kind
+
+        if job_kind not in constants.JOB_PARAMETERS:
+            raise ValueError(
+                f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}"
+            )
+
+        # If Training function or base image is set, configure Job template.
+        if train_func is not None or base_image is not None:
+            # Job name must be set to configure Job template.
+            if name is None:
+                raise ValueError(
+                    "Job name must be set to configure Job from function or image"
+                )
+
+            # Get Pod template spec from function or image.
+            pod_template_spec = utils.get_pod_template_spec(
+                job_kind=job_kind,
+                base_image=base_image,
+                train_func=train_func,
+                parameters=parameters,
+                packages_to_install=packages_to_install,
+                pip_index_url=pip_index_url,
+            )
+
+            # Configure template for different Jobs.
+            # TODO (andreyvelich): Add support for other kinds (e.g. MPIJob).
+            if job_kind == constants.TFJOB_KIND:
+                job = utils.get_tfjob_template(
+                    name=name,
+                    namespace=namespace,
+                    pod_template_spec=pod_template_spec,
+                    num_worker_replicas=num_worker_replicas,
+                    num_chief_replicas=num_chief_replicas,
+                    num_ps_replicas=num_ps_replicas,
+                )
+            elif job_kind == constants.PYTORCHJOB_KIND:
+                job = utils.get_pytorchjob_template(
+                    name=name,
+                    namespace=namespace,
+                    pod_template_spec=pod_template_spec,
+                    num_worker_replicas=num_worker_replicas,
+                )
+            else:
+                raise ValueError(
+                    f"Job kind {job_kind} can't be created using function or image"
+                )
+
+        # Verify Job object type.
+        if not isinstance(job, constants.JOB_MODELS):
+            raise ValueError(f"Job must be one of these types: {constants.JOB_MODELS}")
+
+        # Create the Training Job.
+        try:
+            self.custom_api.create_namespaced_custom_object(
+                constants.GROUP,
+                constants.VERSION,
+                namespace,
+                constants.JOB_PARAMETERS[job.kind]["plural"],
+                job,
+            )
+        except multiprocessing.TimeoutError:
+            raise TimeoutError(
+                f"Timeout to create {job_kind}: {namespace}/{job.metadata.name}"
+            )
+        except Exception:
+            raise RuntimeError(
+                f"Failed to create {job_kind}: {namespace}/{job.metadata.name}"
+            )
+
+        logging.info(f"{job_kind} {namespace}/{job.metadata.name} has been created")
+
+    def get_job(
         self,
         name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        job_kind: str = constants.TFJOB_KIND,
-        job: object = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
         timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+    ) -> constants.JOB_MODELS_TYPE:
+        """Get the Training Job.
+
+        Args:
+            name: Name for the Job.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
+
+        Returns:
+            object: Job object. For example: KubeflowOrgV1PyTorchJob
+
+        Raises:
+            TimeoutError: Timeout to get Job.
+            RuntimeError: Failed to get Job.
+        """
+
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
+
+        if job_kind not in constants.JOB_PARAMETERS:
+            raise ValueError(
+                f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}"
+            )
+
+        try:
+            thread = self.custom_api.get_namespaced_custom_object(
+                constants.GROUP,
+                constants.VERSION,
+                namespace,
+                constants.JOB_PARAMETERS[job_kind]["plural"],
+                name,
+                async_req=True,
+            )
+            response = utils.FakeResponse(thread.get(timeout))
+            job = self.api_client.deserialize(
+                response, constants.JOB_PARAMETERS[job_kind]["model"]
+            )
+
+        except multiprocessing.TimeoutError:
+            raise TimeoutError(f"Timeout to get {job_kind}: {namespace}/{name}")
+        except Exception:
+            raise RuntimeError(f"Failed to get {job_kind}: {namespace}/{name}")
+
+        return job
+
+    def list_jobs(
+        self,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        timeout: int = constants.DEFAULT_TIMEOUT,
+    ) -> List[constants.JOB_MODELS_TYPE]:
+        """List of all Training Jobs with specific kind in namespace.
+
+        Args:
+            namespace: Namespace to list the Jobs. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
+
+        Returns:
+            list[object]: List of Job objects.
+                For example: list of KubeflowOrgV1PyTorchJob objects. It returns empty list
+                if Jobs can't be found.
+
+        Raises:
+            TimeoutError: Timeout to list Jobs
+            RuntimeError: Failed to list Jobs
+        """
+
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
+
+        if job_kind not in constants.JOB_PARAMETERS:
+            raise ValueError(
+                f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}"
+            )
+
+        result = []
+        try:
+            thread = self.custom_api.list_namespaced_custom_object(
+                constants.GROUP,
+                constants.VERSION,
+                namespace,
+                constants.JOB_PARAMETERS[job_kind]["plural"],
+                async_req=True,
+            )
+            response = thread.get(timeout)
+            result = [
+                self.api_client.deserialize(
+                    utils.FakeResponse(item),
+                    constants.JOB_PARAMETERS[job_kind]["model"],
+                )
+                for item in response.get("items")
+            ]
+        except multiprocessing.TimeoutError:
+            raise TimeoutError(f"Timeout to list {job_kind}s in namespace: {namespace}")
+        except Exception:
+            raise RuntimeError(f"Failed to list {job_kind}s in namespace: {namespace}")
+
+        return result
+
+    def get_job_conditions(
+        self,
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        job: Optional[constants.JOB_MODELS_TYPE] = None,
+        timeout: int = constants.DEFAULT_TIMEOUT,
+    ) -> List[models.V1JobCondition]:
         """Get the Training Job conditions. Training Job is in the condition when
         `status=True` for the appropriate condition `type`. For example,
         Training Job is Succeeded when `status=True` and `type=Succeeded`.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
-            job_kind: Kind for the Training job to get conditions.
-                It should be one of these: `TFJob, PyTorchJob, MXJob, XGBoostJob, MPIJob, or PaddleJob`.
-            job: Optionally, Training Job object can be set to get the conditions.
-                It should be type of `KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob,
-                KubeflowOrgV1XGBoostJob, KubeflowOrgV1MPIJob, or KubeflowOrgV1PaddleJob`
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            job: Job object can be set to get the conditions. Object must be one of
+                these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob, etc.
+                If this parameter is omitted, it gets Job with the given name and kind.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
             list[V1JobCondition]: List of Job conditions with
                 last transition time, last update time, message, reason, type, and
-                status. It returns empty list if Training Job does not have any
+                status. It returns empty list if Job does not have any
                 conditions yet.
 
         Raises:
-            ValueError: Job kind is invalid.
-            TimeoutError: Timeout to get Training Job.
-            RuntimeError: Failed to get Training Job.
+            ValueError: Invalid input parameters.
+            TimeoutError: Timeout to get Job.
+            RuntimeError: Failed to get Job.
         """
 
-        models = tuple([d["model"] for d in list(constants.JOB_KINDS.values())])
-        if job is not None and not isinstance(job, models):
-            raise ValueError(f"Job must be one of these types: {models}")
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
+
+        if job_kind not in constants.JOB_PARAMETERS:
+            raise ValueError(
+                f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}"
+            )
+
+        if job is not None and not isinstance(job, constants.JOB_MODELS):
+            raise ValueError(f"Job must be one of these types: {constants.JOB_MODELS}")
 
         # If Job is not set, get the Training Job.
         if job is None:
-            if job_kind not in constants.JOB_KINDS:
+            # Job name must be set when Job object is not set.
+            if name is None:
                 raise ValueError(
-                    f"Job kind must be one of these: {list(constants.JOB_KINDS.keys())}"
+                    "Job name must be set to configure Job from function or image"
                 )
-            job = utils.get_job(
-                custom_api=self.custom_api,
-                api_client=self.api_client,
+
+            job = self.get_job(
                 name=name,
                 namespace=namespace,
-                job_model=constants.JOB_KINDS[job_kind]["model"],
                 job_kind=job_kind,
-                job_plural=constants.JOB_KINDS[job_kind]["plural"],
                 timeout=timeout,
             )
         if job.status and job.status.conditions and len(job.status.conditions) > 0:
@@ -128,30 +401,30 @@ def get_job_conditions(
 
     def is_job_created(
         self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        job_kind: str = constants.TFJOB_KIND,
-        job: object = None,
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        job: Optional[constants.JOB_MODELS_TYPE] = None,
         timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+    ) -> bool:
         """Check if Training Job is Created.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
-            job_kind: Kind for the Training job to check the status.
-                It should be one of these: `TFJob, PyTorchJob, MXJob, XGBoostJob, MPIJob, or PaddleJob`.
-            job: Optionally, Training Job object can be set to check the status.
-                It should be type of `KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob,
-                KubeflowOrgV1XGBoostJob, KubeflowOrgV1MPIJob, or KubeflowOrgV1PaddleJob`
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            job: Job object can be set to get the conditions. Object must be one of
+                these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob, etc.
+                If this parameter is omitted, it gets Job with the given name and kind.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
             bool: True if Job is Created, else False.
 
         Raises:
-            ValueError: Job kind is invalid.
+            ValueError: Invalid input parameters.
             TimeoutError: Timeout to get Job.
             RuntimeError: Failed to get Job.
         """
@@ -163,30 +436,30 @@ def is_job_created(
 
     def is_job_running(
         self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        job_kind: str = constants.TFJOB_KIND,
-        job: object = None,
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        job: Optional[constants.JOB_MODELS_TYPE] = None,
         timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+    ) -> bool:
         """Check if Training Job is Running.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
-            job_kind: Kind for the Training job to check the status.
-                It should be one of these: `TFJob, PyTorchJob, MXJob, XGBoostJob, MPIJob, or PaddleJob`.
-            job: Optionally, Training Job object can be set to check the status.
-                It should be type of `KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob,
-                KubeflowOrgV1XGBoostJob, KubeflowOrgV1MPIJob, or KubeflowOrgV1PaddleJob`
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            job: Job object can be set to get the conditions. Object must be one of
+                these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob, etc.
+                If this parameter is omitted, it gets Job with the given name and kind.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
             bool: True if Job is Running, else False.
 
         Raises:
-            ValueError: Job kind is invalid.
+            ValueError: Invalid input parameters.
             TimeoutError: Timeout to get Job.
             RuntimeError: Failed to get Job.
         """
@@ -198,30 +471,30 @@ def is_job_running(
 
     def is_job_restarting(
         self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        job_kind: str = constants.TFJOB_KIND,
-        job: object = None,
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        job: Optional[constants.JOB_MODELS_TYPE] = None,
         timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+    ) -> bool:
         """Check if Training Job is Restarting.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
-            job_kind: Kind for the Training job to check the status.
-                It should be one of these: `TFJob, PyTorchJob, MXJob, XGBoostJob, MPIJob, or PaddleJob`.
-            job: Optionally, Training Job object can be set to check the status.
-                It should be type of `KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob,
-                KubeflowOrgV1XGBoostJob, KubeflowOrgV1MPIJob, or KubeflowOrgV1PaddleJob`
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            job: Job object can be set to get the conditions. Object must be one of
+                these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob, etc.
+                If this parameter is omitted, it gets Job with the given name and kind.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
             bool: True if Job is Restarting, else False.
 
         Raises:
-            ValueError: Job kind is invalid.
+            ValueError: Invalid input parameters.
             TimeoutError: Timeout to get Job.
             RuntimeError: Failed to get Job.
         """
@@ -233,30 +506,30 @@ def is_job_restarting(
 
     def is_job_succeeded(
         self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        job_kind: str = constants.TFJOB_KIND,
-        job: object = None,
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        job: Optional[constants.JOB_MODELS_TYPE] = None,
         timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+    ) -> bool:
         """Check if Training Job is Succeeded.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
-            job_kind: Kind for the Training job to check the status.
-                It should be one of these: `TFJob, PyTorchJob, MXJob, XGBoostJob, MPIJob, or PaddleJob`.
-            job: Optionally, Training Job object can be set to check the status.
-                It should be type of `KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob,
-                KubeflowOrgV1XGBoostJob, KubeflowOrgV1MPIJob, or KubeflowOrgV1PaddleJob`
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            job: Job object can be set to get the conditions. Object must be one of
+                these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob, etc.
+                If this parameter is omitted, it gets Job with the given name and kind.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
             bool: True if Job is Succeeded, else False.
 
         Raises:
-            ValueError: Job kind is invalid.
+            ValueError: Invalid input parameters.
             TimeoutError: Timeout to get Job.
             RuntimeError: Failed to get Job.
         """
@@ -268,30 +541,30 @@ def is_job_succeeded(
 
     def is_job_failed(
         self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        job_kind: str = constants.TFJOB_KIND,
-        job: object = None,
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        job: Optional[constants.JOB_MODELS_TYPE] = None,
         timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+    ) -> bool:
         """Check if Training Job is Failed.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
-            job_kind: Kind for the Training job to check the status.
-                It should be one of these: `TFJob, PyTorchJob, MXJob, XGBoostJob, MPIJob, or PaddleJob`.
-            job: Optionally, Training Job object can be set to check the status.
-                It should be type of `KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob,
-                KubeflowOrgV1XGBoostJob, KubeflowOrgV1MPIJob, or KubeflowOrgV1PaddleJob`
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
+            job: Job object can be set to get the conditions. Object must be one of
+                these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, KubeflowOrgV1MXJob, etc.
+                If this parameter is omitted, it gets Job with the given name and kind.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
             bool: True if Job is Failed, else False.
 
         Raises:
-            ValueError: Job kind is invalid.
+            ValueError: Invalid input parameters.
             TimeoutError: Timeout to get Job.
             RuntimeError: Failed to get Job.
         """
@@ -304,69 +577,69 @@ def is_job_failed(
     def wait_for_job_conditions(
         self,
         name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        job_kind: str = constants.TFJOB_KIND,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
         expected_conditions: Set = {constants.JOB_CONDITION_SUCCEEDED},
-        timeout: int = 600,
+        wait_timeout: int = 600,
         polling_interval: int = 15,
-        callback: Callable = None,
-        apiserver_timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+        callback: Optional[Callable] = None,
+        timeout: int = constants.DEFAULT_TIMEOUT,
+    ) -> constants.JOB_MODELS_TYPE:
         """Wait until Training Job reaches any of the specified conditions.
         By default it waits for the Succeeded condition.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
-            job_kind: Kind for the Training job to wait for conditions.
-                It should be one of these: `TFJob, PyTorchJob, MXJob, XGBoostJob, MPIJob, or PaddleJob`.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
             expected_conditions: Set of expected conditions. It must be subset of this:
                 `{"Created", "Running", "Restarting", "Succeeded", "Failed"}`
-            timeout: How many seconds to wait until Job reaches one of
+            wait_timeout: How many seconds to wait until Job reaches one of
                 the expected conditions.
             polling_interval: The polling interval in seconds to get Job status.
-            callback: Optional callback function that is invoked after Job
+            callback: Callback function that is invoked after Job
                 status is polled. This function takes a single argument which
                 is current Job object.
-            apiserver_timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
-            object: Training Job object of type `KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob,
-            KubeflowOrgV1MXJob, KubeflowOrgV1XGBoostJob, KubeflowOrgV1MPIJob, or
-            KubeflowOrgV1PaddleJob` which is reached required condition.
+            object: Job object. For example: KubeflowOrgV1PyTorchJob
 
         Raises:
-            ValueError: Expected conditions are invalid or Job kind is invalid
+            ValueError: Invalid input parameters.
             TimeoutError: Timeout to get Job.
-            RuntimeError: Failed to get Job.
+            RuntimeError: Failed to get Job or Job reaches unexpected Failed condition.
         """
 
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
+
         if not expected_conditions.issubset(constants.JOB_CONDITIONS):
             raise ValueError(
-                f"Expected conditions: {expected_conditions} must be subset of {constants.JOB_CONDITIONS}"
+                f"Expected conditions: {expected_conditions} must be subset of \
+                    {constants.JOB_CONDITIONS}"
             )
-        for _ in range(round(timeout / polling_interval)):
-
+        for _ in range(round(wait_timeout / polling_interval)):
             # We should get Job only once per cycle and check the statuses.
-            job = utils.get_job(
-                custom_api=self.custom_api,
-                api_client=self.api_client,
+            job = self.get_job(
                 name=name,
                 namespace=namespace,
-                job_model=constants.JOB_KINDS[job_kind]["model"],
                 job_kind=job_kind,
-                job_plural=constants.JOB_KINDS[job_kind]["plural"],
-                timeout=apiserver_timeout,
-            )
-            conditions = self.get_job_conditions(
-                name, namespace, job_kind, job, timeout
+                timeout=timeout,
             )
+
+            # Get Job conditions.
+            conditions = self.get_job_conditions(job=job, timeout=timeout)
             if len(conditions) > 0:
                 status_logger(
-                    name, conditions[-1].type, conditions[-1].last_transition_time,
+                    name,
+                    conditions[-1].type,
+                    conditions[-1].last_transition_time,
                 )
-            # Execute callback function.
+
+            # Execute callback function is it is set.
             if callback:
                 callback(job)
 
@@ -388,27 +661,29 @@ def wait_for_job_conditions(
             time.sleep(polling_interval)
 
         raise TimeoutError(
-            f"Timeout waiting for {job_kind}: {namespace}/{name} to reach expected conditions: {expected_conditions}"
+            f"Timeout waiting for {job_kind}: {namespace}/{name} to reach expected conditions: \
+                {expected_conditions}"
         )
 
     def get_job_pod_names(
         self,
         name: str,
-        namespace: str = utils.get_default_target_namespace(),
+        namespace: Optional[str] = None,
         is_master: bool = False,
-        replica_type: str = None,
-        replica_index: int = None,
+        replica_type: Optional[str] = None,
+        replica_index: Optional[int] = None,
         timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
+    ) -> List[str]:
         """Get pod names for the Training Job.
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
             is_master: Whether to get pods only with the label
                 `training.kubeflow.org/job-role: master`.
-            replica_type: Optional, type of the Job replica.
-                For TFJob one of `chief`, `ps`, or `worker`.
+            replica_type: Type of the Job replica.
+                For TFJob one of `Chief`, `PS`, or `worker`.
 
                 For PyTorchJob one of `master` or `worker`.
 
@@ -420,9 +695,8 @@ def get_job_pod_names(
 
                 For PaddleJob one of `master` or `worker`.
 
-            replica_index: Optional, index for the Job replica.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
+            replica_index: Index for the Job replica.
+            timeout: Kubernetes API server timeout in seconds to execute the request.
 
         Returns:
             list[str]: List of the Job pod names.
@@ -433,6 +707,8 @@ def get_job_pod_names(
             RuntimeError: Failed to get Job pods.
         """
 
+        namespace = namespace or self.namespace
+
         if (
             replica_type is not None
             and replica_type not in constants.TFJOB_REPLICA_TYPES
@@ -471,7 +747,9 @@ def get_job_pod_names(
         pods = []
         try:
             thread = self.core_api.list_namespaced_pod(
-                namespace, label_selector=label_selector, async_req=True,
+                namespace,
+                label_selector=label_selector,
+                async_req=True,
             )
             response = thread.get(timeout)
         except multiprocessing.TimeoutError:
@@ -486,11 +764,11 @@ def get_job_pod_names(
     def get_job_logs(
         self,
         name: str,
-        namespace: str = utils.get_default_target_namespace(),
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
         is_master: bool = True,
-        replica_type: str = None,
-        replica_index: int = None,
-        container: str = constants.TFJOB_CONTAINER,
+        replica_type: Optional[str] = None,
+        replica_index: Optional[int] = None,
         follow: bool = False,
         timeout: int = constants.DEFAULT_TIMEOUT,
     ):
@@ -499,7 +777,10 @@ def get_job_logs(
 
         Args:
             name: Name for the Job.
-            namespace: Namespace for the Job.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
             is_master: Whether to get logs for the pod with the label
                 `training.kubeflow.org/job-role: master`.
             replica_type: Optional, type of the Job replica.
@@ -526,6 +807,9 @@ def get_job_logs(
             RuntimeError: Failed to get Job pods.
         """
 
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
+
         pods = self.get_job_pod_names(
             name=name,
             namespace=namespace,
@@ -543,7 +827,7 @@ def get_job_logs(
                         self.core_api.read_namespaced_pod_log,
                         name=pod,
                         namespace=namespace,
-                        container=container,
+                        container=constants.JOB_PARAMETERS[job_kind]["container"],
                     )
                 )
             finished = [False for _ in log_streams]
@@ -572,1089 +856,98 @@ def get_job_logs(
             for pod in pods:
                 try:
                     pod_logs = self.core_api.read_namespaced_pod_log(
-                        pod, namespace, container=container
+                        pod,
+                        namespace,
+                        container=constants.JOB_PARAMETERS[job_kind]["container"],
                     )
                     logging.info("The logs of pod %s:\n %s", pod, pod_logs)
                 except Exception:
-                    raise RuntimeError(
-                        f"Failed to read logs for pod {namespace}/{pod}"
-                    )
-
-    # ------------------------------------------------------------------------ #
-    # TFJob Training Client APIs.
-    # ------------------------------------------------------------------------ #
-    def create_tfjob(
-        self,
-        tfjob: models.KubeflowOrgV1TFJob,
-        namespace=utils.get_default_target_namespace(),
-    ):
-        """Create the TFJob.
-
-        Args:
-            tfjob: TFJob object of type KubeflowOrgV1TFJob.
-            namespace: Namespace for the TFJob.
-
-        Raises:
-            TimeoutError: Timeout to create TFJob.
-            RuntimeError: Failed to create TFJob.
-        """
+                    raise RuntimeError(f"Failed to read logs for pod {namespace}/{pod}")
 
-        utils.create_job(
-            custom_api=self.custom_api,
-            job=tfjob,
-            namespace=namespace,
-            job_kind=constants.TFJOB_KIND,
-            job_plural=constants.TFJOB_PLURAL,
-        )
-
-    def create_tfjob_from_func(
+    def update_job(
         self,
+        job: constants.JOB_MODELS_TYPE,
         name: str,
-        func: Callable,
-        parameters: Dict[str, Any] = None,
-        base_image: str = constants.TFJOB_BASE_IMAGE,
-        namespace: str = utils.get_default_target_namespace(),
-        num_chief_replicas: int = None,
-        num_ps_replicas: int = None,
-        num_worker_replicas: int = None,
-        packages_to_install: List[str] = None,
-        pip_index_url: str = "https://pypi.org/simple",
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
     ):
-        """Create TFJob from the function.
+        """Update the Training Job by using patch Kubernetes API.
 
         Args:
-            name: Name for the TFJob.
-            func: Function that TFJob uses to train the model. This function
-                must be Callable. Optionally, this function might have one dict
-                argument to define input parameters for the function.
-            parameters: Dict of input parameters that training function might receive.
-            base_image: Image to use when executing the training function.
-            namespace: Namespace for the TFJob.
-            num_chief_replicas: Number of Chief replicas for the TFJob. Number
-                of Chief replicas can't be more than 1.
-            num_ps_replicas: Number of Parameter Server replicas for the TFJob.
-            num_worker_replicas: Number of Worker replicas for the TFJob.
-            packages_to_install: List of Python packages to install in addition
-                to the base image packages. These packages are installed before
-                executing the objective function.
-            pip_index_url: The PyPI url from which to install Python packages.
-
+            job: Job object. For example, object with type
+                KubeflowOrgV1TFJob or KubeflowOrgV1PyTorchJob.
+            name: Name for the Job.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
         Raises:
-            ValueError: TFJob replicas are missing or training function is invalid.
-            TimeoutError: Timeout to create TFJob.
-            RuntimeError: Failed to create TFJob.
+            TimeoutError: Timeout to update Job
+            RuntimeError: Failed to update Job
         """
 
-        # Check if at least one replica is set.
-        # TODO (andreyvelich): Remove this check once we have CEL validation.
-        # Ref: https://github.com/kubeflow/training-operator/issues/1708
-        if (
-            num_chief_replicas is None
-            and num_ps_replicas is None
-            and num_worker_replicas is None
-        ):
-            raise ValueError("At least one replica for TFJob must be set")
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
 
-        # Check if function is callable.
-        if not callable(func):
+        if job_kind not in constants.JOB_PARAMETERS:
             raise ValueError(
-                f"Training function must be callable, got function type: {type(func)}"
-            )
-
-        # Get TFJob Pod template spec.
-        pod_template_spec = utils.get_pod_template_spec(
-            func=func,
-            parameters=parameters,
-            base_image=base_image,
-            container_name=constants.TFJOB_CONTAINER,
-            packages_to_install=packages_to_install,
-            pip_index_url=pip_index_url,
-        )
-
-        # Create TFJob template.
-        tfjob = models.KubeflowOrgV1TFJob(
-            api_version=f"{constants.KUBEFLOW_GROUP}/{constants.OPERATOR_VERSION}",
-            kind=constants.TFJOB_KIND,
-            metadata=client.V1ObjectMeta(name=name, namespace=namespace),
-            spec=models.KubeflowOrgV1TFJobSpec(
-                run_policy=models.V1RunPolicy(clean_pod_policy=None),
-                tf_replica_specs={},
-            ),
-        )
-
-        # Add Chief, PS, and Worker replicas to the TFJob.
-        if num_chief_replicas is not None:
-            tfjob.spec.tf_replica_specs[
-                constants.REPLICA_TYPE_CHIEF
-            ] = models.V1ReplicaSpec(
-                replicas=num_chief_replicas, template=pod_template_spec,
+                f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}"
             )
 
-        if num_ps_replicas is not None:
-            tfjob.spec.tf_replica_specs[
-                constants.REPLICA_TYPE_PS
-            ] = models.V1ReplicaSpec(
-                replicas=num_ps_replicas, template=pod_template_spec,
-            )
-
-        if num_worker_replicas is not None:
-            tfjob.spec.tf_replica_specs[
-                constants.REPLICA_TYPE_WORKER
-            ] = models.V1ReplicaSpec(
-                replicas=num_worker_replicas, template=pod_template_spec,
+        try:
+            self.custom_api.patch_namespaced_custom_object(
+                constants.GROUP,
+                constants.VERSION,
+                namespace,
+                constants.JOB_PARAMETERS[job_kind]["plural"],
+                name,
+                job,
             )
+        except multiprocessing.TimeoutError:
+            raise TimeoutError(f"Timeout to update {job_kind}: {namespace}/{name}")
+        except Exception:
+            raise RuntimeError(f"Failed to update {job_kind}: {namespace}/{name}")
 
-        # Create TFJob.
-        self.create_tfjob(tfjob=tfjob, namespace=namespace)
-
-    def get_tfjob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """Get the TFJob.
-
-        Args:
-            name: Name for the TFJob.
-            namespace: Namespace for the TFJob.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            KubeflowOrgV1TFJob: TFJob object.
-
-        Raises:
-            TimeoutError: Timeout to get TFJob.
-            RuntimeError: Failed to get TFJob.
-        """
-
-        return utils.get_job(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            name=name,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1TFJob,
-            job_kind=constants.TFJOB_KIND,
-            job_plural=constants.TFJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def list_tfjobs(
-        self,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """List of all TFJobs in namespace.
-
-        Args:
-            namespace: Namespace to list the TFJobs.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            list[KubeflowOrgV1TFJob]: List of TFJobs objects. It returns
-            empty list if TFJobs cannot be found.
-
-        Raises:
-            TimeoutError: Timeout to list TFJobs.
-            RuntimeError: Failed to list TFJobs.
-        """
-
-        return utils.list_jobs(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1TFJob,
-            job_kind=constants.TFJOB_KIND,
-            job_plural=constants.TFJOB_PLURAL,
-            timeout=timeout,
-        )
+        logging.info(f"{job_kind} {namespace}/{name} has been updated")
 
-    def delete_tfjob(
+    def delete_job(
         self,
         name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        delete_options: client.V1DeleteOptions = None,
+        namespace: Optional[str] = None,
+        job_kind: Optional[str] = None,
+        delete_options: Optional[client.V1DeleteOptions] = None,
     ):
-        """Delete the TFJob
+        """Delete the Training Job
 
         Args:
-            name: Name for the TFJob.
-            namespace: Namespace for the TFJob.
+            name: Name for the Job.
+            namespace: Namespace for the Job. By default namespace is taken from
+                `TrainingClient` object.
+            job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind
+                is taken from `TrainingClient` object.
             delete_options: Optional, V1DeleteOptions to set while deleting
-                the TFJob. For example, grace period seconds.
-
-        Raises:
-            TimeoutError: Timeout to delete TFJob.
-            RuntimeError: Failed to delete TFJob.
-        """
-
-        utils.delete_job(
-            custom_api=self.custom_api,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.TFJOB_KIND,
-            job_plural=constants.TFJOB_PLURAL,
-            delete_options=delete_options,
-        )
-
-    def patch_tfjob(
-        self,
-        tfjob: models.KubeflowOrgV1TFJob,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-    ):
-        """Patch the TFJob.
-
-        Args:
-            tfjob: TFJob object of type KubeflowOrgV1TFJob to patch.
-            name: Name for the TFJob.
-            namespace: Namespace for the TFJob.
+                the Job. For example, grace period seconds.
 
         Raises:
-            TimeoutError: Timeout to patch TFJob.
-            RuntimeError: Failed to patch TFJob.
+            TimeoutError: Timeout to delete Job.
+            RuntimeError: Failed to delete Job.
         """
 
-        return utils.patch_job(
-            custom_api=self.custom_api,
-            job=tfjob,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.TFJOB_KIND,
-            job_plural=constants.TFJOB_PLURAL,
-        )
-
-    # ------------------------------------------------------------------------ #
-    # PyTorchJob Training Client APIs.
-    # ------------------------------------------------------------------------ #
-    def create_pytorchjob(
-        self,
-        pytorchjob: models.KubeflowOrgV1PyTorchJob,
-        namespace=utils.get_default_target_namespace(),
-    ):
-        """Create the PyTorchJob.
+        namespace = namespace or self.namespace
+        job_kind = job_kind or self.job_kind
 
-        Args:
-            pytorchjob: PyTorchJob object of type KubeflowOrgV1PyTorchJob.
-            namespace: Namespace for the PyTorchJob.
+        try:
+            self.custom_api.delete_namespaced_custom_object(
+                constants.GROUP,
+                constants.VERSION,
+                namespace,
+                constants.JOB_PARAMETERS[job_kind]["plural"],
+                name=name,
+                body=delete_options,
+            )
+        except multiprocessing.TimeoutError:
+            raise TimeoutError(f"Timeout to delete {job_kind}: {namespace}/{name}")
+        except Exception:
+            raise RuntimeError(f"Failed to delete {job_kind}: {namespace}/{name}")
 
-        Raises:
-            TimeoutError: Timeout to create PyTorchJob.
-            RuntimeError: Failed to create PyTorchJob.
-        """
-
-        utils.create_job(
-            custom_api=self.custom_api,
-            job=pytorchjob,
-            namespace=namespace,
-            job_kind=constants.PYTORCHJOB_KIND,
-            job_plural=constants.PYTORCHJOB_PLURAL,
-        )
-
-    def create_pytorchjob_from_func(
-        self,
-        name: str,
-        func: Callable,
-        parameters: Dict[str, Any] = None,
-        base_image: str = constants.PYTORCHJOB_BASE_IMAGE,
-        namespace: str = utils.get_default_target_namespace(),
-        num_worker_replicas: int = None,
-        packages_to_install: List[str] = None,
-        pip_index_url: str = "https://pypi.org/simple",
-    ):
-        """Create PyTorchJob from the function.
-
-        Args:
-            name: Name for the PyTorchJob.
-            func: Function that PyTorchJob uses to train the model. This function
-                must be Callable. Optionally, this function might have one dict
-                argument to define input parameters for the function.
-            parameters: Dict of input parameters that training function might receive.
-            base_image: Image to use when executing the training function.
-            namespace: Namespace for the PyTorchJob.
-            num_worker_replicas: Number of Worker replicas for the PyTorchJob.
-                If number of Worker replicas is 1, PyTorchJob uses only
-                Master replica.
-            packages_to_install: List of Python packages to install in addition
-                to the base image packages. These packages are installed before
-                executing the objective function.
-            pip_index_url: The PyPI url from which to install Python packages.
-        """
-
-        # Check if at least one worker replica is set.
-        # TODO (andreyvelich): Remove this check once we have CEL validation.
-        # Ref: https://github.com/kubeflow/training-operator/issues/1708
-        if num_worker_replicas is None:
-            raise ValueError("At least one Worker replica for PyTorchJob must be set")
-
-        # Check if function is callable.
-        if not callable(func):
-            raise ValueError(
-                f"Training function must be callable, got function type: {type(func)}"
-            )
-
-        # Get PyTorchJob Pod template spec.
-        pod_template_spec = utils.get_pod_template_spec(
-            func=func,
-            parameters=parameters,
-            base_image=base_image,
-            container_name=constants.PYTORCHJOB_CONTAINER,
-            packages_to_install=packages_to_install,
-            pip_index_url=pip_index_url,
-        )
-
-        # Create PyTorchJob template.
-        pytorchjob = models.KubeflowOrgV1PyTorchJob(
-            api_version=f"{constants.KUBEFLOW_GROUP}/{constants.OPERATOR_VERSION}",
-            kind=constants.PYTORCHJOB_KIND,
-            metadata=client.V1ObjectMeta(name=name, namespace=namespace),
-            spec=models.KubeflowOrgV1PyTorchJobSpec(
-                run_policy=models.V1RunPolicy(clean_pod_policy=None),
-                pytorch_replica_specs={},
-            ),
-        )
-
-        # Add Master and Worker replicas to the PyTorchJob.
-        pytorchjob.spec.pytorch_replica_specs[
-            constants.REPLICA_TYPE_MASTER
-        ] = models.V1ReplicaSpec(replicas=1, template=pod_template_spec,)
-
-        # If number of Worker replicas is 1, PyTorchJob uses only Master replica.
-        if num_worker_replicas != 1:
-            pytorchjob.spec.pytorch_replica_specs[
-                constants.REPLICA_TYPE_WORKER
-            ] = models.V1ReplicaSpec(
-                replicas=num_worker_replicas, template=pod_template_spec,
-            )
-
-        # Create PyTorchJob
-        self.create_pytorchjob(pytorchjob=pytorchjob, namespace=namespace)
-
-    def get_pytorchjob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """Get the PyTorchJob.
-
-        Args:
-            name: Name for the PyTorchJob.
-            namespace: Namespace for the PyTorchJob.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            KubeflowOrgV1PyTorchJob: PyTorchJob object.
-
-        Raises:
-            TimeoutError: Timeout to get PyTorchJob.
-            RuntimeError: Failed to get PyTorchJob.
-        """
-
-        return utils.get_job(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            name=name,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1PyTorchJob,
-            job_kind=constants.PYTORCHJOB_KIND,
-            job_plural=constants.PYTORCHJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def list_pytorchjobs(
-        self,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """List of all PyTorchJob in namespace.
-
-        Args:
-            namespace: Namespace to list the PyTorchJob.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            list[KubeflowOrgV1PyTorchJob]: List of PyTorchJob objects. It returns
-            empty list if PyTorchJobs cannot be found.
-
-        Raises:
-            TimeoutError: Timeout to list PyTorchJobs.
-            RuntimeError: Failed to list PyTorchJobs.
-        """
-
-        return utils.list_jobs(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1PyTorchJob,
-            job_kind=constants.PYTORCHJOB_KIND,
-            job_plural=constants.PYTORCHJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def delete_pytorchjob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        delete_options: client.V1DeleteOptions = None,
-    ):
-        """Delete the PyTorchJob
-
-        Args:
-            name: Name for the PyTorchJob.
-            namespace: Namespace for the PyTorchJob.
-            delete_options: Optional, V1DeleteOptions to set while deleting
-                the PyTorchJob. For example, grace period seconds.
-
-        Raises:
-            TimeoutError: Timeout to delete PyTorchJob.
-            RuntimeError: Failed to delete PyTorchJob.
-        """
-
-        utils.delete_job(
-            custom_api=self.custom_api,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.PYTORCHJOB_KIND,
-            job_plural=constants.PYTORCHJOB_PLURAL,
-            delete_options=delete_options,
-        )
-
-    def patch_pytorchjob(
-        self,
-        pytorchjob: models.KubeflowOrgV1PyTorchJob,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-    ):
-        """Patch the PyTorchJob.
-
-        Args:
-            pytorchjob: PyTorchJob object of type KubeflowOrgV1PyTorchJob.
-            name: Name for the PyTorchJob.
-            namespace: Namespace for the PyTorchJob.
-
-        Raises:
-            TimeoutError: Timeout to patch PyTorchJob.
-            RuntimeError: Failed to patch PyTorchJob.
-        """
-
-        return utils.patch_job(
-            custom_api=self.custom_api,
-            job=pytorchjob,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.PYTORCHJOB_KIND,
-            job_plural=constants.PYTORCHJOB_PLURAL,
-        )
-
-    # ------------------------------------------------------------------------ #
-    # MXJob Training Client APIs.
-    # ------------------------------------------------------------------------ #
-    def create_mxjob(
-        self,
-        mxjob: models.KubeflowOrgV1MXJob,
-        namespace=utils.get_default_target_namespace(),
-    ):
-        """Create the MXJob.
-
-        Args:
-            mxjob: MXJob object of type KubeflowOrgV1MXJob.
-            namespace: Namespace for the MXJob.
-
-        Raises:
-            TimeoutError: Timeout to create MXJob.
-            RuntimeError: Failed to create MXJob.
-        """
-
-        utils.create_job(
-            custom_api=self.custom_api,
-            job=mxjob,
-            namespace=namespace,
-            job_kind=constants.MXJOB_KIND,
-            job_plural=constants.MXJOB_PLURAL,
-        )
-
-    def create_mxjob_from_func(self):
-        """Create MXJob from the function.
-        TODO (andreyvelich): Implement this function.
-        """
-        logging.warning("This API has not been implemented yet.")
-
-    def get_mxjob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """Get the MXJob.
-
-        Args:
-            name: Name for the MXJob.
-            namespace: Namespace for the MXJob.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            KubeflowOrgV1MXJob: MXJob object.
-
-        Raises:
-            TimeoutError: Timeout to get MXJob.
-            RuntimeError: Failed to get MXJob.
-        """
-
-        return utils.get_job(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            name=name,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1MXJob,
-            job_kind=constants.MXJOB_KIND,
-            job_plural=constants.MXJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def list_mxjobs(
-        self,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """List of all MXJobs in namespace.
-
-        Args:
-            namespace: Namespace to list the MXJobs.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            list[KubeflowOrgV1MXJob]: List of MXJobs objects. It returns
-            empty list if MXJobs cannot be found.
-
-        Raises:
-            TimeoutError: Timeout to list MXJobs.
-            RuntimeError: Failed to list MXJobs.
-        """
-
-        return utils.list_jobs(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1MXJob,
-            job_kind=constants.MXJOB_KIND,
-            job_plural=constants.MXJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def delete_mxjob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        delete_options: client.V1DeleteOptions = None,
-    ):
-        """Delete the MXJob
-
-        Args:
-            name: Name for the MXJob.
-            namespace: Namespace for the MXJob.
-            delete_options: Optional, V1DeleteOptions to set while deleting
-                the MXJob. For example, grace period seconds.
-
-        Raises:
-            TimeoutError: Timeout to delete MXJob.
-            RuntimeError: Failed to delete MXJob.
-        """
-
-        utils.delete_job(
-            custom_api=self.custom_api,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.MXJOB_KIND,
-            job_plural=constants.MXJOB_PLURAL,
-            delete_options=delete_options,
-        )
-
-    def patch_mxjob(
-        self,
-        mxjob: models.KubeflowOrgV1MXJob,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-    ):
-        """Patch the MXJob.
-
-        Args:
-            mxjob: MXJob object of type KubeflowOrgV1MXJob.
-            name: Name for the MXJob.
-            namespace: Namespace for the MXJob.
-
-        Raises:
-            TimeoutError: Timeout to patch MXJob.
-            RuntimeError: Failed to patch MXJob.
-        """
-
-        return utils.patch_job(
-            custom_api=self.custom_api,
-            job=mxjob,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.MXJOB_KIND,
-            job_plural=constants.MXJOB_PLURAL,
-        )
-
-    # ------------------------------------------------------------------------ #
-    # XGBoostJob Training Client APIs.
-    # ------------------------------------------------------------------------ #
-    def create_xgboostjob(
-        self,
-        xgboostjob: models.KubeflowOrgV1XGBoostJob,
-        namespace=utils.get_default_target_namespace(),
-    ):
-        """Create the XGBoostJob.
-
-        Args:
-            xgboostjob: XGBoostJob object of type KubeflowOrgV1XGBoostJob.
-            namespace: Namespace for the XGBoostJob.
-
-        Raises:
-            TimeoutError: Timeout to create XGBoostJob.
-            RuntimeError: Failed to create XGBoostJob.
-        """
-
-        utils.create_job(
-            custom_api=self.custom_api,
-            job=xgboostjob,
-            namespace=namespace,
-            job_kind=constants.XGBOOSTJOB_KIND,
-            job_plural=constants.XGBOOSTJOB_PLURAL,
-        )
-
-    def create_xgboostjob_from_func(self):
-        """Create XGBoost from the function.
-        TODO (andreyvelich): Implement this function.
-        """
-        logging.warning("This API has not been implemented yet.")
-
-    def get_xgboostjob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """Get the XGBoostJob.
-
-        Args:
-            name: Name for the XGBoostJob.
-            namespace: Namespace for the XGBoostJob.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            KubeflowOrgV1XGBoostJob: XGBoostJob object.
-
-        Raises:
-            TimeoutError: Timeout to get XGBoostJob.
-            RuntimeError: Failed to get XGBoostJob.
-        """
-
-        return utils.get_job(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            name=name,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1XGBoostJob,
-            job_kind=constants.XGBOOSTJOB_KIND,
-            job_plural=constants.XGBOOSTJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def list_xgboostjobs(
-        self,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """List of all XGBoostJobs in namespace.
-
-        Args:
-            namespace: Namespace to list the XGBoostJobs.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            list[KubeflowOrgV1XGBoostJob]: List of XGBoostJobs objects. It returns
-            empty list if XGBoostJobs cannot be found.
-
-        Raises:
-            TimeoutError: Timeout to list XGBoostJobs.
-            RuntimeError: Failed to list XGBoostJobs.
-        """
-
-        return utils.list_jobs(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1XGBoostJob,
-            job_kind=constants.XGBOOSTJOB_KIND,
-            job_plural=constants.XGBOOSTJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def delete_xgboostjob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        delete_options: client.V1DeleteOptions = None,
-    ):
-        """Delete the XGBoostJob
-
-        Args:
-            name: Name for the XGBoostJob.
-            namespace: Namespace for the XGBoostJob.
-            delete_options: Optional, V1DeleteOptions to set while deleting
-                the XGBoostJob. For example, grace period seconds.
-
-        Raises:
-            TimeoutError: Timeout to delete XGBoostJob.
-            RuntimeError: Failed to delete XGBoostJob.
-        """
-
-        utils.delete_job(
-            custom_api=self.custom_api,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.XGBOOSTJOB_KIND,
-            job_plural=constants.XGBOOSTJOB_PLURAL,
-            delete_options=delete_options,
-        )
-
-    def patch_xgboostjob(
-        self,
-        xgboostjob: models.KubeflowOrgV1XGBoostJob,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-    ):
-        """Patch the XGBoostJob.
-
-        Args:
-            xgboostjob: XGBoostJob object of type KubeflowOrgV1XGBoostJob.
-            name: Name for the XGBoostJob.
-            namespace: Namespace for the XGBoostJob.
-
-        Raises:
-            TimeoutError: Timeout to patch XGBoostJob.
-            RuntimeError: Failed to patch XGBoostJob.
-        """
-
-        return utils.patch_job(
-            custom_api=self.custom_api,
-            job=xgboostjob,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.XGBOOSTJOB_KIND,
-            job_plural=constants.XGBOOSTJOB_PLURAL,
-        )
-
-    # ------------------------------------------------------------------------ #
-    # MPIJob Training Client APIs.
-    # ------------------------------------------------------------------------ #
-    def create_mpijob(
-        self,
-        mpijob: models.KubeflowOrgV1MPIJob,
-        namespace=utils.get_default_target_namespace(),
-    ):
-        """Create the MPIJob.
-
-        Args:
-            mpijob: MPIJob object of type KubeflowOrgV1MPIJob.
-            namespace: Namespace for the MPIJob.
-
-        Raises:
-            TimeoutError: Timeout to create MPIJob.
-            RuntimeError: Failed to create MPIJob.
-        """
-
-        utils.create_job(
-            custom_api=self.custom_api,
-            job=mpijob,
-            namespace=namespace,
-            job_kind=constants.MPIJOB_KIND,
-            job_plural=constants.MPIJOB_PLURAL,
-        )
-
-    def create_mpijob_from_func(self):
-        """Create MPIJob from the function.
-        TODO (andreyvelich): Implement this function.
-        """
-        logging.warning("This API has not been implemented yet.")
-
-    def get_mpijob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """Get the MPIJob.
-
-        Args:
-            name: Name for the MPIJob.
-            namespace: Namespace for the MPIJob.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            KubeflowOrgV1MPIJob: MPIJob object.
-
-        Raises:
-            TimeoutError: Timeout to get MPIJob.
-            RuntimeError: Failed to get MPIJob.
-        """
-
-        return utils.get_job(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            name=name,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1MPIJob,
-            job_kind=constants.MPIJOB_KIND,
-            job_plural=constants.MPIJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def list_mpijobs(
-        self,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """List of all MPIJobs in namespace.
-
-        Args:
-            namespace: Namespace to list the MPIJobs.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            list[KubeflowOrgV1MPIJob]: List of MPIJobs objects. It returns
-            empty list if MPIJobs cannot be found.
-
-        Raises:
-            TimeoutError: Timeout to list MPIJobs.
-            RuntimeError: Failed to list MPIJobs.
-        """
-
-        return utils.list_jobs(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1MPIJob,
-            job_kind=constants.MPIJOB_KIND,
-            job_plural=constants.MPIJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def delete_mpijob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        delete_options: client.V1DeleteOptions = None,
-    ):
-        """Delete the MPIJob
-
-        Args:
-            name: Name for the MPIJob.
-            namespace: Namespace for the MPIJob.
-            delete_options: Optional, V1DeleteOptions to set while deleting
-                the MPIJob. For example, grace period seconds.
-
-        Raises:
-            TimeoutError: Timeout to delete MPIJob.
-            RuntimeError: Failed to delete MPIJob.
-        """
-
-        utils.delete_job(
-            custom_api=self.custom_api,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.MPIJOB_KIND,
-            job_plural=constants.MPIJOB_PLURAL,
-            delete_options=delete_options,
-        )
-
-    def patch_mpijob(
-        self,
-        mpijob: models.KubeflowOrgV1MPIJob,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-    ):
-        """Patch the MPIJob.
-
-        Args:
-            mpijob: MPIJob object of type KubeflowOrgV1MPIJob.
-            name: Name for the MPIJob.
-            namespace: Namespace for the MPIJob.
-
-        Raises:
-            TimeoutError: Timeout to patch MPIJob.
-            RuntimeError: Failed to patch MPIJob.
-        """
-
-        return utils.patch_job(
-            custom_api=self.custom_api,
-            job=mpijob,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.MPIJOB_KIND,
-            job_plural=constants.MPIJOB_PLURAL,
-        )
-
-    # ------------------------------------------------------------------------ #
-    # PaddleJob Training Client APIs.
-    # ------------------------------------------------------------------------ #
-    def create_paddlejob(
-        self,
-        paddlejob: models.KubeflowOrgV1PaddleJob,
-        namespace=utils.get_default_target_namespace(),
-    ):
-        """Create the PaddleJob.
-
-        Args:
-            paddlejob: PaddleJob object of type KubeflowOrgV1PaddleJob.
-            namespace: Namespace for the PaddleJob.
-
-        Raises:
-            TimeoutError: Timeout to create PaddleJob.
-            RuntimeError: Failed to create PaddleJob.
-        """
-
-        utils.create_job(
-            custom_api=self.custom_api,
-            job=paddlejob,
-            namespace=namespace,
-            job_kind=constants.PADDLEJOB_KIND,
-            job_plural=constants.PADDLEJOB_PLURAL,
-        )
-
-    def create_paddlejob_from_func(self):
-        """Create PaddleJob from the function.
-        TODO (andreyvelich): Implement this function.
-        """
-        logging.warning("This API has not been implemented yet.")
-
-    def get_paddlejob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """Get the PaddleJob.
-
-        Args:
-            name: Name for the PaddleJob.
-            namespace: Namespace for the PaddleJob.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            KubeflowOrgV1PaddleJob: PaddleJob object.
-
-        Raises:
-            TimeoutError: Timeout to get PaddleJob.
-            RuntimeError: Failed to get PaddleJob.
-        """
-
-        return utils.get_job(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            name=name,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1PaddleJob,
-            job_kind=constants.PADDLEJOB_KIND,
-            job_plural=constants.PADDLEJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def list_paddlejobs(
-        self,
-        namespace: str = utils.get_default_target_namespace(),
-        timeout: int = constants.DEFAULT_TIMEOUT,
-    ):
-        """List of all PaddleJobs in namespace.
-
-        Args:
-            namespace: Namespace to list the PaddleJobs.
-            timeout: Optional, Kubernetes API server timeout in seconds
-                to execute the request.
-
-        Returns:
-            list[KubeflowOrgV1PaddleJob]: List of PaddleJobs objects. It returns
-            empty list if PaddleJobs cannot be found.
-
-        Raises:
-            TimeoutError: Timeout to list PaddleJobs.
-            RuntimeError: Failed to list PaddleJobs.
-        """
-
-        return utils.list_jobs(
-            custom_api=self.custom_api,
-            api_client=self.api_client,
-            namespace=namespace,
-            job_model=models.KubeflowOrgV1PaddleJob,
-            job_kind=constants.PADDLEJOB_KIND,
-            job_plural=constants.PADDLEJOB_PLURAL,
-            timeout=timeout,
-        )
-
-    def delete_paddlejob(
-        self,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-        delete_options: client.V1DeleteOptions = None,
-    ):
-        """Delete the PaddleJob
-
-        Args:
-            name: Name for the PaddleJob.
-            namespace: Namespace for the PaddleJob.
-            delete_options: Optional, V1DeleteOptions to set while deleting
-                the PaddleJob. For example, grace period seconds.
-
-        Raises:
-            TimeoutError: Timeout to delete PaddleJob.
-            RuntimeError: Failed to delete PaddleJob.
-        """
-
-        utils.delete_job(
-            custom_api=self.custom_api,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.PADDLEJOB_KIND,
-            job_plural=constants.PADDLEJOB_PLURAL,
-            delete_options=delete_options,
-        )
-
-    def patch_paddlejob(
-        self,
-        paddlejob: models.KubeflowOrgV1PaddleJob,
-        name: str,
-        namespace: str = utils.get_default_target_namespace(),
-    ):
-        """Patch the PaddleJob.
-
-        Args:
-            paddlejob: PaddleJob object of type KubeflowOrgV1PaddleJob.
-            name: Name for the PaddleJob.
-            namespace: Namespace for the PaddleJob.
-
-        Raises:
-            TimeoutError: Timeout to patch PaddleJob.
-            RuntimeError: Failed to patch PaddleJob.
-        """
-
-        return utils.patch_job(
-            custom_api=self.custom_api,
-            job=paddlejob,
-            name=name,
-            namespace=namespace,
-            job_kind=constants.PADDLEJOB_KIND,
-            job_plural=constants.PADDLEJOB_PLURAL,
-        )
+        logging.info(f"{job_kind} {namespace}/{name} has been deleted")
diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py
index 2105fb684a..09547dfdd3 100644
--- a/sdk/python/kubeflow/training/constants/constants.py
+++ b/sdk/python/kubeflow/training/constants/constants.py
@@ -13,15 +13,22 @@
 # limitations under the License.
 
 from kubeflow.training import models
+from typing import Union
 
 # How long to wait in seconds for requests to the Kubernetes API Server.
 DEFAULT_TIMEOUT = 120
 
-# Common constants.
-KUBEFLOW_GROUP = "kubeflow.org"
-OPERATOR_VERSION = "v1"
+# The default PIP index URL to download Python packages.
+DEFAULT_PIP_INDEX_URL = "https://pypi.org/simple"
+
+# Annotation to disable Istio sidecar.
 ISTIO_SIDECAR_INJECTION = "sidecar.istio.io/inject"
 
+# Common constants.
+GROUP = "kubeflow.org"
+VERSION = "v1"
+API_VERSION = f"{GROUP}/{VERSION}"
+
 # Training Job conditions.
 JOB_CONDITION_CREATED = "Created"
 JOB_CONDITION_RUNNING = "Running"
@@ -50,12 +57,19 @@
 REPLICA_TYPE_PS = "PS"
 REPLICA_TYPE_MASTER = "Master"
 REPLICA_TYPE_WORKER = "Worker"
+REPLICA_TYPE_SCHEDULER = "Scheduler"
+REPLICA_TYPE_SERVER = "Server"
+REPLICA_TYPE_LAUNCHER = "Launcher"
 
 # TFJob constants.
 TFJOB_KIND = "TFJob"
 TFJOB_PLURAL = "tfjobs"
 TFJOB_CONTAINER = "tensorflow"
-TFJOB_REPLICA_TYPES = {"ps", "chief", "worker"}
+TFJOB_REPLICA_TYPES = (
+    REPLICA_TYPE_PS.lower(),
+    REPLICA_TYPE_CHIEF.lower(),
+    REPLICA_TYPE_WORKER.lower(),
+)
 
 TFJOB_BASE_IMAGE = "docker.io/tensorflow/tensorflow:2.9.1"
 TFJOB_BASE_IMAGE_GPU = "docker.io/tensorflow/tensorflow:2.9.1-gpu"
@@ -64,49 +78,89 @@
 PYTORCHJOB_KIND = "PyTorchJob"
 PYTORCHJOB_PLURAL = "pytorchjobs"
 PYTORCHJOB_CONTAINER = "pytorch"
-PYTORCHJOB_REPLICA_TYPES = {"master", "worker"}
+PYTORCHJOB_REPLICA_TYPES = (REPLICA_TYPE_MASTER.lower(), REPLICA_TYPE_WORKER.lower())
 
 PYTORCHJOB_BASE_IMAGE = "docker.io/pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime"
 
 # MXJob constants
 MXJOB_KIND = "MXJob"
 MXJOB_PLURAL = "mxjobs"
-MXJOB_REPLICA_TYPES = {"scheduler", "server", "worker"}
+MXJOB_CONTAINER = "mxnet"
+MXJOB_REPLICA_TYPES = (
+    REPLICA_TYPE_SCHEDULER.lower(),
+    REPLICA_TYPE_SERVER.lower(),
+    REPLICA_TYPE_WORKER.lower(),
+)
 
 # XGBoostJob constants
 XGBOOSTJOB_KIND = "XGBoostJob"
 XGBOOSTJOB_PLURAL = "xgboostjobs"
-XGBOOSTJOB_REPLICA_TYPES = {"master", "worker"}
+XGBOOSTJOB_CONTAINER = "xgboost"
+XGBOOSTJOB_REPLICA_TYPES = (REPLICA_TYPE_MASTER.lower(), REPLICA_TYPE_WORKER.lower())
 
 # MPIJob constants
 MPIJOB_KIND = "MPIJob"
 MPIJOB_PLURAL = "mpijobs"
-MPIJOB_REPLICA_TYPES = {"launcher", "worker"}
+MPIJOB_CONTAINER = "mpi"
+MPIJOB_REPLICA_TYPES = (REPLICA_TYPE_LAUNCHER.lower(), REPLICA_TYPE_WORKER.lower())
 
 # PaddleJob constants
 PADDLEJOB_KIND = "PaddleJob"
 PADDLEJOB_PLURAL = "paddlejobs"
-PADDLEJOB_REPLICA_TYPES = {"master", "worker"}
+PADDLEJOB_CONTAINER = "paddle"
+PADDLEJOB_REPLICA_TYPES = (REPLICA_TYPE_MASTER.lower(), REPLICA_TYPE_WORKER.lower())
 
 PADDLEJOB_BASE_IMAGE = (
     "docker.io/paddlepaddle/paddle:2.4.0rc0-gpu-cuda11.2-cudnn8.1-trt8.0"
 )
 
-# Dictionary to get plural and model for each Job kind.
-JOB_KINDS = {
-    TFJOB_KIND: {"plural": TFJOB_PLURAL, "model": models.KubeflowOrgV1TFJob},
+
+# Dictionary to get plural, model, and container for each Job kind.
+JOB_PARAMETERS = {
+    TFJOB_KIND: {
+        "model": models.KubeflowOrgV1TFJob,
+        "plural": TFJOB_PLURAL,
+        "container": TFJOB_CONTAINER,
+        "base_image": TFJOB_BASE_IMAGE,
+    },
     PYTORCHJOB_KIND: {
-        "plural": PYTORCHJOB_PLURAL,
         "model": models.KubeflowOrgV1PyTorchJob,
+        "plural": PYTORCHJOB_PLURAL,
+        "container": PYTORCHJOB_CONTAINER,
+        "base_image": PYTORCHJOB_BASE_IMAGE,
+    },
+    MXJOB_KIND: {
+        "model": models.KubeflowOrgV1MXJob,
+        "plural": MXJOB_PLURAL,
+        "container": MXJOB_CONTAINER,
     },
-    MXJOB_KIND: {"plural": MXJOB_PLURAL, "model": models.KubeflowOrgV1MXJob},
     XGBOOSTJOB_KIND: {
-        "plural": XGBOOSTJOB_PLURAL,
         "model": models.KubeflowOrgV1XGBoostJob,
+        "plural": XGBOOSTJOB_PLURAL,
+        "container": XGBOOSTJOB_CONTAINER,
+    },
+    MPIJOB_KIND: {
+        "model": models.KubeflowOrgV1MPIJob,
+        "plural": MPIJOB_PLURAL,
+        "container": MPIJOB_CONTAINER,
     },
-    MPIJOB_KIND: {"plural": MPIJOB_PLURAL, "model": models.KubeflowOrgV1MPIJob},
     PADDLEJOB_KIND: {
-        "plural": PADDLEJOB_PLURAL,
         "model": models.KubeflowOrgV1PaddleJob,
+        "plural": PADDLEJOB_PLURAL,
+        "container": PADDLEJOB_CONTAINER,
+        "base_image": PADDLEJOB_BASE_IMAGE,
     },
 }
+
+# Tuple of all Job models.
+JOB_MODELS = tuple([d["model"] for d in list(JOB_PARAMETERS.values())])
+
+# Union type of all Job models.
+JOB_MODELS_TYPE = Union[
+    models.KubeflowOrgV1TFJob,
+    models.KubeflowOrgV1PyTorchJob,
+    models.KubeflowOrgV1MXJob,
+    models.KubeflowOrgV1XGBoostJob,
+    models.KubeflowOrgV1MPIJob,
+    models.KubeflowOrgV1PaddleJob,
+]
diff --git a/sdk/python/kubeflow/training/utils/utils.py b/sdk/python/kubeflow/training/utils/utils.py
index 84ceb172f3..1659b14cd3 100644
--- a/sdk/python/kubeflow/training/utils/utils.py
+++ b/sdk/python/kubeflow/training/utils/utils.py
@@ -16,16 +16,13 @@
 import logging
 import textwrap
 import inspect
-from typing import Callable, List, Dict, Any
+from typing import Optional, Callable, List, Dict, Any
 import json
 import threading
 import queue
-import multiprocessing
-
-from kubernetes import client
 
 from kubeflow.training.constants import constants
-from kubeflow.training.api_client import ApiClient
+from kubeflow.training import models
 
 
 logging.basicConfig(format="%(message)s")
@@ -68,156 +65,6 @@ def get_default_target_namespace():
         return f.readline()
 
 
-def create_job(
-    custom_api: client.CustomObjectsApi,
-    job: object,
-    namespace: str,
-    job_kind: str,
-    job_plural: str,
-):
-    """Create the Training Job."""
-
-    try:
-        custom_api.create_namespaced_custom_object(
-            constants.KUBEFLOW_GROUP,
-            constants.OPERATOR_VERSION,
-            namespace,
-            job_plural,
-            job,
-        )
-    except multiprocessing.TimeoutError:
-        raise TimeoutError(
-            f"Timeout to create {job_kind}: {namespace}/{job.metadata.name}"
-        )
-    except Exception:
-        raise RuntimeError(
-            f"Failed to create {job_kind}: {namespace}/{job.metadata.name}"
-        )
-
-    logging.info(f"{job_kind} {namespace}/{job.metadata.name} has been created")
-
-
-def get_job(
-    custom_api: client.CustomObjectsApi,
-    api_client: ApiClient,
-    name: str,
-    namespace: str,
-    job_model: object,
-    job_kind: str,
-    job_plural: str,
-    timeout: int,
-):
-    """Get the Training Job."""
-
-    try:
-        thread = custom_api.get_namespaced_custom_object(
-            constants.KUBEFLOW_GROUP,
-            constants.OPERATOR_VERSION,
-            namespace,
-            job_plural,
-            name,
-            async_req=True,
-        )
-        response = FakeResponse(thread.get(timeout))
-        job = api_client.deserialize(response, job_model)
-        return job
-
-    except multiprocessing.TimeoutError:
-        raise TimeoutError(f"Timeout to get {job_kind}: {namespace}/{name}")
-    except Exception:
-        raise RuntimeError(f"Failed to get {job_kind}: {namespace}/{name}")
-
-
-def list_jobs(
-    custom_api: client.CustomObjectsApi,
-    api_client: ApiClient,
-    namespace: str,
-    job_model: object,
-    job_kind: str,
-    job_plural: str,
-    timeout: int,
-):
-    """List the Training Jobs."""
-
-    result = []
-    try:
-        thread = custom_api.list_namespaced_custom_object(
-            constants.KUBEFLOW_GROUP,
-            constants.OPERATOR_VERSION,
-            namespace,
-            job_plural,
-            async_req=True,
-        )
-        response = thread.get(timeout)
-        result = [
-            api_client.deserialize(FakeResponse(item), job_model)
-            for item in response.get("items")
-        ]
-    except multiprocessing.TimeoutError:
-        raise TimeoutError(f"Timeout to list {job_kind}s in namespace: {namespace}")
-    except Exception:
-        raise RuntimeError(f"Failed to list {job_kind}s in namespace: {namespace}")
-    return result
-
-
-def delete_job(
-    custom_api: client.CustomObjectsApi,
-    name: str,
-    namespace: str,
-    job_kind: str,
-    job_plural: str,
-    delete_options: client.V1DeleteOptions,
-):
-    """Delete the Training Job."""
-
-    try:
-        custom_api.delete_namespaced_custom_object(
-            constants.KUBEFLOW_GROUP,
-            constants.OPERATOR_VERSION,
-            namespace,
-            job_plural,
-            name=name,
-            body=delete_options,
-        )
-    except multiprocessing.TimeoutError:
-        raise TimeoutError(f"Timeout to delete {job_kind}: {namespace}/{name}")
-    except Exception:
-        raise RuntimeError(f"Failed to delete {job_kind}: {namespace}/{name}")
-
-    logging.info(f"{job_kind} {namespace}/{name} has been deleted")
-
-
-def patch_job(
-    custom_api: client.CustomObjectsApi,
-    job: object,
-    name: str,
-    namespace: str,
-    job_kind: str,
-    job_plural: str,
-):
-    """Patch the Training Job."""
-
-    try:
-        custom_api.patch_namespaced_custom_object(
-            constants.KUBEFLOW_GROUP,
-            constants.OPERATOR_VERSION,
-            namespace,
-            job_plural,
-            name,
-            job,
-        )
-    except multiprocessing.TimeoutError:
-        raise TimeoutError(
-            f"Timeout to patch {job_kind}: {namespace}/{job.metadata.name}"
-        )
-    except Exception:
-        raise RuntimeError(
-            f"Failed to patch {job_kind}: {namespace}/{job.metadata.name}"
-        )
-
-    logging.info(f"{job_kind} {namespace}/{job.metadata.name} has been patched")
-
-
 def wrap_log_stream(q, stream):
     while True:
         try:
@@ -237,8 +84,9 @@ def get_log_queue_pool(streams):
     return pool
 
 
-def has_condition(conditions: object, condition_type: str):
-    """Verify if the condition list has the required condition.
+def has_condition(conditions: List[models.V1JobCondition], condition_type: str) -> bool:
+    """
+    Verify if the condition list has the required condition.
     Condition should be valid object with `type` and `status`.
     """
 
@@ -248,7 +96,12 @@ def has_condition(conditions: object, condition_type: str):
     return False
 
 
-def get_script_for_python_packages(packages_to_install, pip_index_url):
+def get_script_for_python_packages(
+    packages_to_install: List[str], pip_index_url: str
+) -> str:
+    """
+    Get init script to install Python packages from the given pip index URL.
+    """
     packages_str = " ".join([str(package) for package in packages_to_install])
 
     script_for_python_packages = textwrap.dedent(
@@ -266,73 +119,184 @@ def get_script_for_python_packages(packages_to_install, pip_index_url):
 
 
 def get_pod_template_spec(
-    func: Callable,
-    parameters: Dict[str, Any],
-    base_image: str,
-    container_name: str,
-    packages_to_install: List[str],
-    pip_index_url: str,
+    job_kind: str,
+    base_image: Optional[str] = None,
+    train_func: Optional[Callable] = None,
+    parameters: Optional[Dict[str, Any]] = None,
+    packages_to_install: Optional[List[str]] = None,
+    pip_index_url: str = constants.DEFAULT_PIP_INDEX_URL,
 ):
     """
-    Get Pod template spec from the given function and input parameters.
+    Get Pod template spec for the given function and base image.
     """
 
-    # Check if function is callable.
-    if not callable(func):
-        raise ValueError(
-            f"Training function must be callable, got function type: {type(func)}"
+    # Assign the default base image.
+    # TODO (andreyvelich): Add base image for other Job kinds.
+    if base_image is None:
+        base_image = constants.JOB_PARAMETERS[job_kind]["base_image"]
+
+    # Create Pod template spec.
+    pod_template_spec = models.V1PodTemplateSpec(
+        metadata=models.V1ObjectMeta(
+            annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+        ),
+        spec=models.V1PodSpec(
+            containers=[
+                models.V1Container(
+                    name=constants.JOB_PARAMETERS[job_kind]["container"],
+                    image=base_image,
+                )
+            ]
+        ),
+    )
+
+    # If Training function is set, convert function to container execution script.
+    if train_func is not None:
+        # Check if function is callable.
+        if not callable(train_func):
+            raise ValueError(
+                f"Training function must be callable, got function type: {type(train_func)}"
+            )
+
+        # Extract function implementation.
+        func_code = inspect.getsource(train_func)
+
+        # Function might be defined in some indented scope (e.g. in another function).
+        # We need to dedent the function code.
+        func_code = textwrap.dedent(func_code)
+
+        # Wrap function code to execute it from the file. For example:
+        # def train(parameters):
+        #     print('Start Training...')
+        # train({'lr': 0.01})
+        if parameters is None:
+            func_code = f"{func_code}\n{train_func.__name__}()\n"
+        else:
+            func_code = f"{func_code}\n{train_func.__name__}({parameters})\n"
+
+        # Prepare execute script template.
+        exec_script = textwrap.dedent(
+            """
+                program_path=$(mktemp -d)
+                read -r -d '' SCRIPT << EOM\n
+                {func_code}
+                EOM
+                printf "%s" \"$SCRIPT\" > \"$program_path/ephemeral_script.py\"
+                python3 -u \"$program_path/ephemeral_script.py\""""
         )
 
-    # Extract function implementation.
-    func_code = inspect.getsource(func)
+        # Add function code to the execute script.
+        exec_script = exec_script.format(func_code=func_code)
 
-    # Function might be defined in some indented scope (e.g. in another function).
-    # We need to dedent the function code.
-    func_code = textwrap.dedent(func_code)
+        # Install Python packages if that is required.
+        if packages_to_install is not None:
+            exec_script = (
+                get_script_for_python_packages(packages_to_install, pip_index_url)
+                + exec_script
+            )
 
-    # Wrap function code to execute it from the file. For example:
-    # def train(parameters):
-    #     print('Start Training...')
-    # train({'lr': 0.01})
-    if parameters is None:
-        func_code = f"{func_code}\n{func.__name__}()\n"
-    else:
-        func_code = f"{func_code}\n{func.__name__}({parameters})\n"
+        # Add execution script to container arguments.
+        pod_template_spec.spec.containers[0].command = ["bash", "-c"]
+        pod_template_spec.spec.containers[0].args = [exec_script]
 
-    # Prepare execute script template.
-    exec_script = textwrap.dedent(
-        """
-            program_path=$(mktemp -d)
-            read -r -d '' SCRIPT << EOM\n
-            {func_code}
-            EOM
-            printf "%s" "$SCRIPT" > $program_path/ephemeral_script.py
-            python3 -u $program_path/ephemeral_script.py"""
+    return pod_template_spec
+
+
+def get_tfjob_template(
+    name: str,
+    namespace: str,
+    pod_template_spec: models.V1PodTemplateSpec,
+    num_worker_replicas: Optional[int] = None,
+    num_chief_replicas: Optional[int] = None,
+    num_ps_replicas: Optional[int] = None,
+):
+    # Check if at least one replica is set.
+    # TODO (andreyvelich): Remove this check once we have CEL validation.
+    # Ref: https://github.com/kubeflow/training-operator/issues/1708
+    if (
+        num_worker_replicas is None
+        and num_chief_replicas is None
+        and num_ps_replicas is None
+    ):
+        raise ValueError("At least one replica for TFJob must be set")
+
+    # Create TFJob template.
+    tfjob = models.KubeflowOrgV1TFJob(
+        api_version=constants.API_VERSION,
+        kind=constants.TFJOB_KIND,
+        metadata=models.V1ObjectMeta(name=name, namespace=namespace),
+        spec=models.KubeflowOrgV1TFJobSpec(
+            run_policy=models.KubeflowOrgV1RunPolicy(clean_pod_policy=None),
+            tf_replica_specs={},
+        ),
     )
 
-    # Add function code to the execute script.
-    exec_script = exec_script.format(func_code=func_code)
+    # Add Chief, PS, and Worker replicas to the TFJob.
+    if num_chief_replicas is not None:
+        tfjob.spec.tf_replica_specs[
+            constants.REPLICA_TYPE_CHIEF
+        ] = models.KubeflowOrgV1ReplicaSpec(
+            replicas=num_chief_replicas,
+            template=pod_template_spec,
+        )
 
-    # Install Python packages if that is required.
-    if packages_to_install is not None:
-        exec_script = (
-            get_script_for_python_packages(packages_to_install, pip_index_url)
-            + exec_script
+    if num_ps_replicas is not None:
+        tfjob.spec.tf_replica_specs[
+            constants.REPLICA_TYPE_PS
+        ] = models.KubeflowOrgV1ReplicaSpec(
+            replicas=num_ps_replicas,
+            template=pod_template_spec,
         )
 
-    # Create Pod template spec.
-    pod_template_spec = client.V1PodTemplateSpec(
-        metadata=client.V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-        spec=client.V1PodSpec(
-            containers=[
-                client.V1Container(
-                    name=container_name,
-                    image=base_image,
-                    command=["bash", "-c"],
-                    args=[exec_script],
-                )
-            ]
+    if num_worker_replicas is not None:
+        tfjob.spec.tf_replica_specs[
+            constants.REPLICA_TYPE_WORKER
+        ] = models.KubeflowOrgV1ReplicaSpec(
+            replicas=num_worker_replicas,
+            template=pod_template_spec,
+        )
+
+    return tfjob
+
+
+def get_pytorchjob_template(
+    name: str,
+    namespace: str,
+    pod_template_spec: models.V1PodTemplateSpec,
+    num_worker_replicas: Optional[int] = None,
+):
+    # Check if at least one replica is set.
+    # TODO (andreyvelich): Remove this check once we have CEL validation.
+    # Ref: https://github.com/kubeflow/training-operator/issues/1708
+    if num_worker_replicas is None:
+        raise ValueError("At least one Worker replica for PyTorchJob must be set")
+
+    # Create PyTorchJob template.
+    pytorchjob = models.KubeflowOrgV1PyTorchJob(
+        api_version=constants.API_VERSION,
+        kind=constants.PYTORCHJOB_KIND,
+        metadata=models.V1ObjectMeta(name=name, namespace=namespace),
+        spec=models.KubeflowOrgV1PyTorchJobSpec(
+            run_policy=models.KubeflowOrgV1RunPolicy(clean_pod_policy=None),
+            pytorch_replica_specs={},
         ),
     )
 
-    return pod_template_spec
+    # Add Master and Worker replicas to the PyTorchJob.
+    pytorchjob.spec.pytorch_replica_specs[
+        constants.REPLICA_TYPE_MASTER
+    ] = models.KubeflowOrgV1ReplicaSpec(
+        replicas=1,
+        template=pod_template_spec,
+    )
+
+    # If number of Worker replicas is 1, PyTorchJob uses only Master replica.
+    if num_worker_replicas != 1:
+        pytorchjob.spec.pytorch_replica_specs[
+            constants.REPLICA_TYPE_WORKER
+        ] = models.KubeflowOrgV1ReplicaSpec(
+            replicas=num_worker_replicas,
+            template=pod_template_spec,
+        )
+
+    return pytorchjob
diff --git a/sdk/python/setup.py b/sdk/python/setup.py
index c837d447db..fc93bac67d 100644
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -14,7 +14,13 @@
 
 import setuptools
 
-TESTS_REQUIRES = ["pytest", "pytest-tornasync", "mypy"]
+TESTS_REQUIRES = [
+    "pytest",
+    "pytest-tornasync",
+    "mypy",
+    "black==21.12b0",
+    "flake==4.0.1",
+]
 
 REQUIRES = [
     "certifi>=14.05.14",
diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py
index 1371cb5374..756906f27f 100644
--- a/sdk/python/test/conftest.py
+++ b/sdk/python/test/conftest.py
@@ -1,5 +1,6 @@
 import pytest
 
+
 def pytest_addoption(parser):
     parser.addoption("--namespace", action="store", default="default")
 
diff --git a/sdk/python/test/e2e/constants.py b/sdk/python/test/e2e/constants.py
index 0eb28d72f8..04be27836f 100644
--- a/sdk/python/test/e2e/constants.py
+++ b/sdk/python/test/e2e/constants.py
@@ -17,7 +17,10 @@
 TEST_GANG_SCHEDULER_NAME_VOLCANO = "volcano"
 TEST_GANG_SCHEDULER_NAME_NONE = "none"
 
-GANG_SCHEDULERS = {TEST_GANG_SCHEDULER_NAME_SCHEDULER_PLUGINS, TEST_GANG_SCHEDULER_NAME_VOLCANO}
+GANG_SCHEDULERS = {
+    TEST_GANG_SCHEDULER_NAME_SCHEDULER_PLUGINS,
+    TEST_GANG_SCHEDULER_NAME_VOLCANO,
+}
 NONE_GANG_SCHEDULERS = {TEST_GANG_SCHEDULER_NAME_NONE, ""}
 
 DEFAULT_SCHEDULER_PLUGINS_NAME = "scheduler-plugins-scheduler"
diff --git a/sdk/python/test/e2e/test_e2e_mpijob.py b/sdk/python/test/e2e/test_e2e_mpijob.py
index aa34fde75e..abc2e78d50 100644
--- a/sdk/python/test/e2e/test_e2e_mpijob.py
+++ b/sdk/python/test/e2e/test_e2e_mpijob.py
@@ -15,7 +15,7 @@
 import os
 import logging
 import pytest
-from typing import Tuple
+from typing import Tuple, Optional
 
 from kubernetes.client import V1PodTemplateSpec
 from kubernetes.client import V1ObjectMeta
@@ -31,21 +31,22 @@
 from kubeflow.training import KubeflowOrgV1SchedulingPolicy
 from kubeflow.training.constants import constants
 
-from test.e2e.utils import verify_job_e2e, verify_unschedulable_job_e2e, get_pod_spec_scheduler_name
+import test.e2e.utils as utils
 from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY
 from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS
 
 logging.basicConfig(format="%(message)s")
 logging.getLogger().setLevel(logging.INFO)
 
-TRAINING_CLIENT = TrainingClient()
+TRAINING_CLIENT = TrainingClient(job_kind=constants.MPIJOB_KIND)
 JOB_NAME = "mpijob-mxnet-ci-test"
 CONTAINER_NAME = "mpi"
-GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY)
+GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "")
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, reason="For gang-scheduling",
+    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS,
+    reason="For gang-scheduling",
 )
 def test_sdk_e2e_with_gang_scheduling(job_namespace):
     launcher_container, worker_container = generate_containers()
@@ -54,11 +55,13 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="Never",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[launcher_container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
@@ -66,45 +69,52 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="Never",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[worker_container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
-    mpijob = generate_mpijob(launcher, worker, KubeflowOrgV1SchedulingPolicy(min_available=10), job_namespace)
-    patched_mpijob = generate_mpijob(launcher, worker, KubeflowOrgV1SchedulingPolicy(min_available=2), job_namespace)
+    mpijob = generate_mpijob(
+        job_namespace, launcher, worker, KubeflowOrgV1SchedulingPolicy(min_available=10)
+    )
+    patched_mpijob = generate_mpijob(
+        job_namespace, launcher, worker, KubeflowOrgV1SchedulingPolicy(min_available=2)
+    )
 
-    TRAINING_CLIENT.create_mpijob(mpijob, job_namespace)
-    logging.info(f"List of created {constants.MPIJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_mpijobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=mpijob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_unschedulable_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.MPIJOB_KIND,
-    )
+    try:
+        utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"MPIJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.patch_mpijob(patched_mpijob, JOB_NAME, job_namespace)
-    logging.info(f"List of patched {constants.MPIJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_mpijobs(job_namespace))
+    TRAINING_CLIENT.update_job(patched_mpijob, JOB_NAME, job_namespace)
+    logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.MPIJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"MPIJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_mpijob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in GANG_SCHEDULERS, reason="For plain scheduling",
+    GANG_SCHEDULER_NAME in GANG_SCHEDULERS,
+    reason="For plain scheduling",
 )
 def test_sdk_e2e(job_namespace):
     launcher_container, worker_container = generate_containers()
@@ -112,43 +122,51 @@ def test_sdk_e2e(job_namespace):
     launcher = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="Never",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[launcher_container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[launcher_container]),
+        ),
     )
 
     worker = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="Never",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[worker_container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[worker_container]),
+        ),
     )
 
-    mpijob = generate_mpijob(launcher, worker, job_namespace=job_namespace)
+    mpijob = generate_mpijob(job_namespace, launcher, worker)
 
-    TRAINING_CLIENT.create_mpijob(mpijob, job_namespace)
-    logging.info(f"List of created {constants.MPIJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_mpijobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=mpijob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.MPIJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"MPIJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_mpijob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 def generate_mpijob(
+    job_namespace: str,
     launcher: KubeflowOrgV1ReplicaSpec,
     worker: KubeflowOrgV1ReplicaSpec,
-    scheduling_policy: KubeflowOrgV1SchedulingPolicy = None,
-    job_namespace: str = "default",
+    scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None,
 ) -> KubeflowOrgV1MPIJob:
     return KubeflowOrgV1MPIJob(
-        api_version="kubeflow.org/v1",
-        kind="MPIJob",
+        api_version=constants.API_VERSION,
+        kind=constants.MPIJOB_KIND,
         metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace),
         spec=KubeflowOrgV1MPIJobSpec(
             slots_per_worker=1,
diff --git a/sdk/python/test/e2e/test_e2e_mxjob.py b/sdk/python/test/e2e/test_e2e_mxjob.py
index 7d0b36370e..d0db8de467 100644
--- a/sdk/python/test/e2e/test_e2e_mxjob.py
+++ b/sdk/python/test/e2e/test_e2e_mxjob.py
@@ -15,7 +15,7 @@
 import os
 import logging
 import pytest
-from typing import Tuple
+from typing import Tuple, Optional
 
 from kubernetes.client import V1PodTemplateSpec
 from kubernetes.client import V1ObjectMeta
@@ -32,21 +32,22 @@
 from kubeflow.training import KubeflowOrgV1SchedulingPolicy
 from kubeflow.training.constants import constants
 
-from test.e2e.utils import verify_job_e2e, verify_unschedulable_job_e2e, get_pod_spec_scheduler_name
+import test.e2e.utils as utils
 from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY
 from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS
 
 logging.basicConfig(format="%(message)s")
 logging.getLogger().setLevel(logging.INFO)
 
-TRAINING_CLIENT = TrainingClient()
+TRAINING_CLIENT = TrainingClient(job_kind=constants.MXJOB_KIND)
 JOB_NAME = "mxjob-mnist-ci-test"
 CONTAINER_NAME = "mxnet"
-GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY)
+GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "")
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, reason="For gang-scheduling",
+    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS,
+    reason="For gang-scheduling",
 )
 def test_sdk_e2e_with_gang_scheduling(job_namespace):
     worker_container, server_container, scheduler_container = generate_containers()
@@ -55,11 +56,13 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="Never",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[worker_container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
@@ -67,11 +70,13 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="Never",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[server_container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
@@ -79,45 +84,60 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="Never",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[scheduler_container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
-    unschedulable_mxjob = generate_mxjob(scheduler, server, worker, KubeflowOrgV1SchedulingPolicy(min_available=10), job_namespace)
-    schedulable_mxjob = generate_mxjob(scheduler, server, worker, KubeflowOrgV1SchedulingPolicy(min_available=3), job_namespace)
-
-    TRAINING_CLIENT.create_mxjob(unschedulable_mxjob, job_namespace)
-    logging.info(f"List of created {constants.MXJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_mxjobs(job_namespace))
-
-    verify_unschedulable_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
+    unschedulable_mxjob = generate_mxjob(
         job_namespace,
-        constants.MXJOB_KIND,
+        scheduler,
+        server,
+        worker,
+        KubeflowOrgV1SchedulingPolicy(min_available=10),
     )
-
-    TRAINING_CLIENT.patch_mxjob(schedulable_mxjob, JOB_NAME, job_namespace)
-    logging.info(f"List of patched {constants.MXJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_mxjobs(job_namespace))
-
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
+    schedulable_mxjob = generate_mxjob(
         job_namespace,
-        constants.MXJOB_KIND,
-        CONTAINER_NAME,
+        scheduler,
+        server,
+        worker,
+        KubeflowOrgV1SchedulingPolicy(min_available=3),
     )
 
-    TRAINING_CLIENT.delete_mxjob(JOB_NAME, job_namespace)
+    TRAINING_CLIENT.create_job(job=unschedulable_mxjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
+
+    try:
+        utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"MXJob E2E fails. Exception: {e}")
+
+    TRAINING_CLIENT.update_job(schedulable_mxjob, JOB_NAME, job_namespace)
+    logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
+
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"MXJob E2E fails. Exception: {e}")
+
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in GANG_SCHEDULERS, reason="For plain scheduling",
+    GANG_SCHEDULER_NAME in GANG_SCHEDULERS,
+    reason="For plain scheduling",
 )
 def test_sdk_e2e(job_namespace):
     worker_container, server_container, scheduler_container = generate_containers()
@@ -125,51 +145,63 @@ def test_sdk_e2e(job_namespace):
     worker = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="Never",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[worker_container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[worker_container]),
+        ),
     )
 
     server = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="Never",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[server_container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[server_container]),
+        ),
     )
 
     scheduler = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="Never",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[scheduler_container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[scheduler_container]),
+        ),
     )
 
-    mxjob = generate_mxjob(scheduler, server, worker, job_namespace=job_namespace)
+    mxjob = generate_mxjob(job_namespace, scheduler, server, worker)
 
-    TRAINING_CLIENT.create_mxjob(mxjob, job_namespace)
-    logging.info(f"List of created {constants.MXJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_mxjobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=mxjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.MXJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"MXJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_mxjob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 def generate_mxjob(
+    job_namespace: str,
     scheduler: KubeflowOrgV1ReplicaSpec,
     server: KubeflowOrgV1ReplicaSpec,
     worker: KubeflowOrgV1ReplicaSpec,
-    scheduling_policy: KubeflowOrgV1SchedulingPolicy = None,
-    job_namespace: str = "default",
+    scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None,
 ) -> KubeflowOrgV1MXJob:
     return KubeflowOrgV1MXJob(
-        api_version="kubeflow.org/v1",
-        kind="MXJob",
+        api_version=constants.API_VERSION,
+        kind=constants.MXJOB_KIND,
         metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace),
         spec=KubeflowOrgV1MXJobSpec(
             job_mode="MXTrain",
diff --git a/sdk/python/test/e2e/test_e2e_paddlejob.py b/sdk/python/test/e2e/test_e2e_paddlejob.py
index 8f138492fc..7b41d729e9 100644
--- a/sdk/python/test/e2e/test_e2e_paddlejob.py
+++ b/sdk/python/test/e2e/test_e2e_paddlejob.py
@@ -15,6 +15,7 @@
 import os
 import logging
 import pytest
+from typing import Optional
 
 from kubernetes.client import V1PodTemplateSpec
 from kubernetes.client import V1ObjectMeta
@@ -30,21 +31,22 @@
 from kubeflow.training import KubeflowOrgV1SchedulingPolicy
 from kubeflow.training.constants import constants
 
-from test.e2e.utils import verify_job_e2e, verify_unschedulable_job_e2e, get_pod_spec_scheduler_name
+import test.e2e.utils as utils
 from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY
 from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS
 
 logging.basicConfig(format="%(message)s")
 logging.getLogger().setLevel(logging.INFO)
 
-TRAINING_CLIENT = TrainingClient()
+TRAINING_CLIENT = TrainingClient(job_kind=constants.PADDLEJOB_KIND)
 JOB_NAME = "paddlejob-cpu-ci-test"
 CONTAINER_NAME = "paddle"
-GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY)
+GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "")
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, reason="For gang-scheduling",
+    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS,
+    reason="For gang-scheduling",
 )
 def test_sdk_e2e_with_gang_scheduling(job_namespace):
     container = generate_container()
@@ -53,45 +55,52 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=2,
         restart_policy="OnFailure",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
                 containers=[container],
-            )
+            ),
         ),
     )
 
-    unschedulable_paddlejob = generate_paddlejob(worker, KubeflowOrgV1SchedulingPolicy(min_available=10), job_namespace)
-    schedulable_paddlejob = generate_paddlejob(worker, KubeflowOrgV1SchedulingPolicy(min_available=2), job_namespace)
+    unschedulable_paddlejob = generate_paddlejob(
+        job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=10)
+    )
+    schedulable_paddlejob = generate_paddlejob(
+        job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=2)
+    )
 
-    TRAINING_CLIENT.create_paddlejob(unschedulable_paddlejob, job_namespace)
-    logging.info(f"List of created {constants.PADDLEJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_paddlejobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=unschedulable_paddlejob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_unschedulable_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.PADDLEJOB_KIND,
-    )
+    try:
+        utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"PaddleJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.patch_paddlejob(schedulable_paddlejob, JOB_NAME, job_namespace)
-    logging.info(f"List of patched {constants.PADDLEJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_paddlejobs(job_namespace))
+    TRAINING_CLIENT.update_job(schedulable_paddlejob, JOB_NAME, job_namespace)
+    logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.PADDLEJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"PaddleJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_paddlejob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in GANG_SCHEDULERS, reason="For plain scheduling",
+    GANG_SCHEDULER_NAME in GANG_SCHEDULERS,
+    reason="For plain scheduling",
 )
 def test_sdk_e2e(job_namespace):
     container = generate_container()
@@ -99,35 +108,39 @@ def test_sdk_e2e(job_namespace):
     worker = KubeflowOrgV1ReplicaSpec(
         replicas=2,
         restart_policy="OnFailure",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[container]),
+        ),
     )
 
-    paddlejob = generate_paddlejob(worker, job_namespace=job_namespace)
+    paddlejob = generate_paddlejob(job_namespace, worker)
 
-    TRAINING_CLIENT.create_paddlejob(paddlejob, job_namespace)
-    logging.info(f"List of created {constants.PADDLEJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_paddlejobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=paddlejob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.PADDLEJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"PaddleJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_paddlejob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 def generate_paddlejob(
+    job_namespace: str,
     worker: KubeflowOrgV1ReplicaSpec,
-    scheduling_policy: KubeflowOrgV1SchedulingPolicy = None,
-    job_namespace: str = "default",
+    scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None,
 ) -> KubeflowOrgV1PaddleJob:
     return KubeflowOrgV1PaddleJob(
-        api_version="kubeflow.org/v1",
-        kind="PaddleJob",
+        api_version=constants.API_VERSION,
+        kind=constants.PADDLEJOB_KIND,
         metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace),
         spec=KubeflowOrgV1PaddleJobSpec(
             run_policy=KubeflowOrgV1RunPolicy(
diff --git a/sdk/python/test/e2e/test_e2e_pytorchjob.py b/sdk/python/test/e2e/test_e2e_pytorchjob.py
index 22a9b11b83..d7cc976f09 100644
--- a/sdk/python/test/e2e/test_e2e_pytorchjob.py
+++ b/sdk/python/test/e2e/test_e2e_pytorchjob.py
@@ -15,6 +15,7 @@
 import os
 import logging
 import pytest
+from typing import Optional
 
 from kubernetes.client import V1PodTemplateSpec
 from kubernetes.client import V1ObjectMeta
@@ -28,23 +29,24 @@
 from kubeflow.training import KubeflowOrgV1PyTorchJobSpec
 from kubeflow.training import KubeflowOrgV1RunPolicy
 from kubeflow.training import KubeflowOrgV1SchedulingPolicy
-from kubeflow.training.constants import constants
+from kubeflow.training import constants
 
-from test.e2e.utils import verify_job_e2e, verify_unschedulable_job_e2e, get_pod_spec_scheduler_name
+import test.e2e.utils as utils
 from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY
 from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS
 
 logging.basicConfig(format="%(message)s")
 logging.getLogger().setLevel(logging.INFO)
 
-TRAINING_CLIENT = TrainingClient()
+TRAINING_CLIENT = TrainingClient(job_kind=constants.PYTORCHJOB_KIND)
 JOB_NAME = "pytorchjob-mnist-ci-test"
 CONTAINER_NAME = "pytorch"
-GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY)
+GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "")
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, reason="For gang-scheduling",
+    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS,
+    reason="For gang-scheduling",
 )
 def test_sdk_e2e_with_gang_scheduling(job_namespace):
     container = generate_container()
@@ -53,11 +55,13 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="OnFailure",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
                 containers=[container],
-            )
+            ),
         ),
     )
 
@@ -65,46 +69,55 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="OnFailure",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
                 containers=[container],
-            )
+            ),
         ),
     )
 
-    unschedulable_pytorchjob = generate_pytorchjob(master, worker, KubeflowOrgV1SchedulingPolicy(min_available=10), job_namespace)
-    schedulable_pytorchjob = generate_pytorchjob(master, worker, KubeflowOrgV1SchedulingPolicy(min_available=2), job_namespace)
-
-    TRAINING_CLIENT.create_pytorchjob(unschedulable_pytorchjob, job_namespace)
-    logging.info(f"List of created {constants.PYTORCHJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_pytorchjobs(job_namespace))
-
-    verify_unschedulable_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
+    unschedulable_pytorchjob = generate_pytorchjob(
         job_namespace,
-        constants.PYTORCHJOB_KIND,
+        master,
+        worker,
+        KubeflowOrgV1SchedulingPolicy(min_available=10),
+    )
+    schedulable_pytorchjob = generate_pytorchjob(
+        job_namespace, master, worker, KubeflowOrgV1SchedulingPolicy(min_available=2)
     )
 
-    TRAINING_CLIENT.patch_pytorchjob(schedulable_pytorchjob, JOB_NAME, job_namespace)
-    logging.info(f"List of patched {constants.PYTORCHJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_pytorchjobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=unschedulable_pytorchjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.PYTORCHJOB_KIND,
-        CONTAINER_NAME,
-        timeout=900,
-    )
+    try:
+        utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"PyTorchJob E2E fails. Exception: {e}")
+
+    TRAINING_CLIENT.update_job(schedulable_pytorchjob, JOB_NAME, job_namespace)
+    logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    TRAINING_CLIENT.delete_pytorchjob(JOB_NAME, job_namespace)
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"PyTorchJob E2E fails. Exception: {e}")
+
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in GANG_SCHEDULERS, reason="For plain scheduling",
+    GANG_SCHEDULER_NAME in GANG_SCHEDULERS,
+    reason="For plain scheduling",
 )
 def test_sdk_e2e(job_namespace):
     container = generate_container()
@@ -112,44 +125,51 @@ def test_sdk_e2e(job_namespace):
     master = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="OnFailure",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[container]),
+        ),
     )
 
     worker = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="OnFailure",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[container]),
+        ),
     )
 
-    pytorchjob = generate_pytorchjob(master, worker, job_namespace=job_namespace)
+    pytorchjob = generate_pytorchjob(job_namespace, master, worker)
 
-    TRAINING_CLIENT.create_pytorchjob(pytorchjob, job_namespace)
-    logging.info(f"List of created {constants.PYTORCHJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_pytorchjobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=pytorchjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.PYTORCHJOB_KIND,
-        CONTAINER_NAME,
-        timeout=900,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"PyTorchJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_pytorchjob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 def generate_pytorchjob(
+    job_namespace: str,
     master: KubeflowOrgV1ReplicaSpec,
     worker: KubeflowOrgV1ReplicaSpec,
-    scheduling_policy: KubeflowOrgV1SchedulingPolicy = None,
-    job_namespace: str = "default",
+    scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None,
 ) -> KubeflowOrgV1PyTorchJob:
     return KubeflowOrgV1PyTorchJob(
-        api_version="kubeflow.org/v1",
-        kind="PyTorchJob",
+        api_version=constants.API_VERSION,
+        kind=constants.PYTORCHJOB_KIND,
         metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace),
         spec=KubeflowOrgV1PyTorchJobSpec(
             run_policy=KubeflowOrgV1RunPolicy(
diff --git a/sdk/python/test/e2e/test_e2e_tfjob.py b/sdk/python/test/e2e/test_e2e_tfjob.py
index 6eaa086a59..9f14cf1877 100644
--- a/sdk/python/test/e2e/test_e2e_tfjob.py
+++ b/sdk/python/test/e2e/test_e2e_tfjob.py
@@ -15,6 +15,7 @@
 import os
 import logging
 import pytest
+from typing import Optional
 
 from kubernetes.client import V1PodTemplateSpec
 from kubernetes.client import V1ObjectMeta
@@ -30,21 +31,22 @@
 from kubeflow.training import KubeflowOrgV1SchedulingPolicy
 from kubeflow.training.constants import constants
 
-from test.e2e.utils import verify_job_e2e, verify_unschedulable_job_e2e, get_pod_spec_scheduler_name
+import test.e2e.utils as utils
 from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY
 from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS
 
 logging.basicConfig(format="%(message)s")
 logging.getLogger().setLevel(logging.INFO)
 
-TRAINING_CLIENT = TrainingClient()
+TRAINING_CLIENT = TrainingClient(job_kind=constants.TFJOB_KIND)
 JOB_NAME = "tfjob-mnist-ci-test"
 CONTAINER_NAME = "tensorflow"
-GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY)
+GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "")
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, reason="For gang-scheduling",
+    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS,
+    reason="For gang-scheduling",
 )
 def test_sdk_e2e_with_gang_scheduling(job_namespace):
     container = generate_container()
@@ -53,45 +55,52 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="Never",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
-    unschedulable_tfjob = generate_tfjob(worker, KubeflowOrgV1SchedulingPolicy(min_available=10), job_namespace)
-    schedulable_tfjob = generate_tfjob(worker, KubeflowOrgV1SchedulingPolicy(min_available=1), job_namespace)
+    unschedulable_tfjob = generate_tfjob(
+        job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=10)
+    )
+    schedulable_tfjob = generate_tfjob(
+        job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=1)
+    )
 
-    TRAINING_CLIENT.create_tfjob(unschedulable_tfjob, job_namespace)
-    logging.info(f"List of created {constants.TFJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_tfjobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=unschedulable_tfjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_unschedulable_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.TFJOB_KIND,
-    )
+    try:
+        utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"TFJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.patch_tfjob(schedulable_tfjob, JOB_NAME, job_namespace)
-    logging.info(f"List of patched {constants.TFJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_tfjobs(job_namespace))
+    TRAINING_CLIENT.update_job(schedulable_tfjob, JOB_NAME, job_namespace)
+    logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.TFJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"TFJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_tfjob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in GANG_SCHEDULERS, reason="For plain scheduling",
+    GANG_SCHEDULER_NAME in GANG_SCHEDULERS,
+    reason="For plain scheduling",
 )
 def test_sdk_e2e(job_namespace):
     container = generate_container()
@@ -99,31 +108,39 @@ def test_sdk_e2e(job_namespace):
     worker = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="Never",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[container]),
+        ),
     )
 
-    tfjob = generate_tfjob(worker, job_namespace=job_namespace)
+    tfjob = generate_tfjob(job_namespace, worker)
 
-    TRAINING_CLIENT.create_tfjob(tfjob, job_namespace)
-    logging.info(f"List of created {constants.TFJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_tfjobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=tfjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT, JOB_NAME, job_namespace, constants.TFJOB_KIND, CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"TFJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_tfjob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 def generate_tfjob(
+    job_namespace: str,
     worker: KubeflowOrgV1ReplicaSpec,
-    scheduling_policy: KubeflowOrgV1SchedulingPolicy = None,
-    job_namespace: str = "default",
+    scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None,
 ) -> KubeflowOrgV1TFJob:
     return KubeflowOrgV1TFJob(
-        api_version="kubeflow.org/v1",
-        kind="TFJob",
+        api_version=constants.API_VERSION,
+        kind=constants.TFJOB_KIND,
         metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace),
         spec=KubeflowOrgV1TFJobSpec(
             run_policy=KubeflowOrgV1RunPolicy(
diff --git a/sdk/python/test/e2e/test_e2e_xgboostjob.py b/sdk/python/test/e2e/test_e2e_xgboostjob.py
index 0f0542e909..1c334e2b78 100644
--- a/sdk/python/test/e2e/test_e2e_xgboostjob.py
+++ b/sdk/python/test/e2e/test_e2e_xgboostjob.py
@@ -15,6 +15,7 @@
 import os
 import logging
 import pytest
+from typing import Optional
 
 from kubernetes.client import V1PodTemplateSpec
 from kubernetes.client import V1ObjectMeta
@@ -30,21 +31,22 @@
 from kubeflow.training import KubeflowOrgV1SchedulingPolicy
 from kubeflow.training.constants import constants
 
-from test.e2e.utils import verify_job_e2e, verify_unschedulable_job_e2e, get_pod_spec_scheduler_name
+import test.e2e.utils as utils
 from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY
 from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS
 
 logging.basicConfig(format="%(message)s")
 logging.getLogger().setLevel(logging.INFO)
 
-TRAINING_CLIENT = TrainingClient()
+TRAINING_CLIENT = TrainingClient(job_kind=constants.XGBOOSTJOB_KIND)
 JOB_NAME = "xgboostjob-iris-ci-test"
 CONTAINER_NAME = "xgboost"
-GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY)
+GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "")
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, reason="For gang-scheduling",
+    GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS,
+    reason="For gang-scheduling",
 )
 def test_sdk_e2e_with_gang_scheduling(job_namespace):
     container = generate_container()
@@ -53,11 +55,13 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="OnFailure",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
@@ -65,45 +69,52 @@ def test_sdk_e2e_with_gang_scheduling(job_namespace):
         replicas=1,
         restart_policy="OnFailure",
         template=V1PodTemplateSpec(
-            metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
             spec=V1PodSpec(
                 containers=[container],
-                scheduler_name=get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
-            )
+                scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME),
+            ),
         ),
     )
 
-    unschedulable_xgboostjob = generate_xgboostjob(master, worker, KubeflowOrgV1SchedulingPolicy(min_available=10), job_namespace)
-    schedulable_xgboostjob = generate_xgboostjob(master, worker, KubeflowOrgV1SchedulingPolicy(min_available=2), job_namespace)
+    unschedulable_xgboostjob = generate_xgboostjob(
+        job_namespace, master, worker, KubeflowOrgV1SchedulingPolicy(min_available=10)
+    )
+    schedulable_xgboostjob = generate_xgboostjob(
+        job_namespace, master, worker, KubeflowOrgV1SchedulingPolicy(min_available=2)
+    )
 
-    TRAINING_CLIENT.create_xgboostjob(unschedulable_xgboostjob, job_namespace)
-    logging.info(f"List of created {constants.XGBOOSTJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_xgboostjobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=unschedulable_xgboostjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_unschedulable_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.XGBOOSTJOB_KIND,
-    )
+    try:
+        utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"XGBoostJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.patch_xgboostjob(schedulable_xgboostjob, JOB_NAME, job_namespace)
-    logging.info(f"List of patched {constants.XGBOOSTJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_xgboostjobs(job_namespace))
+    TRAINING_CLIENT.update_job(schedulable_xgboostjob, JOB_NAME, job_namespace)
+    logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.XGBOOSTJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"XGBoostJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_xgboostjob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 @pytest.mark.skipif(
-    GANG_SCHEDULER_NAME in GANG_SCHEDULERS, reason="For plain scheduling",
+    GANG_SCHEDULER_NAME in GANG_SCHEDULERS,
+    reason="For plain scheduling",
 )
 def test_sdk_e2e(job_namespace):
     container = generate_container()
@@ -111,43 +122,51 @@ def test_sdk_e2e(job_namespace):
     master = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="OnFailure",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[container]),
+        ),
     )
 
     worker = KubeflowOrgV1ReplicaSpec(
         replicas=1,
         restart_policy="OnFailure",
-        template=V1PodTemplateSpec(metadata=V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}),
-                                   spec=V1PodSpec(containers=[container])),
+        template=V1PodTemplateSpec(
+            metadata=V1ObjectMeta(
+                annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}
+            ),
+            spec=V1PodSpec(containers=[container]),
+        ),
     )
 
-    xgboostjob = generate_xgboostjob(master, worker, job_namespace=job_namespace)
+    xgboostjob = generate_xgboostjob(job_namespace, master, worker)
 
-    TRAINING_CLIENT.create_xgboostjob(xgboostjob, job_namespace)
-    logging.info(f"List of created {constants.XGBOOSTJOB_KIND}s")
-    logging.info(TRAINING_CLIENT.list_xgboostjobs(job_namespace))
+    TRAINING_CLIENT.create_job(job=xgboostjob, namespace=job_namespace)
+    logging.info(f"List of created {TRAINING_CLIENT.job_kind}s")
+    logging.info(TRAINING_CLIENT.list_jobs(job_namespace))
 
-    verify_job_e2e(
-        TRAINING_CLIENT,
-        JOB_NAME,
-        job_namespace,
-        constants.XGBOOSTJOB_KIND,
-        CONTAINER_NAME,
-    )
+    try:
+        utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900)
+    except Exception as e:
+        utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+        TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
+        raise Exception(f"XGBoostJob E2E fails. Exception: {e}")
 
-    TRAINING_CLIENT.delete_xgboostjob(JOB_NAME, job_namespace)
+    utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace)
+    TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace)
 
 
 def generate_xgboostjob(
+    job_namespace: str,
     master: KubeflowOrgV1ReplicaSpec,
     worker: KubeflowOrgV1ReplicaSpec,
-    scheduling_policy: KubeflowOrgV1SchedulingPolicy = None,
-    job_namespace: str = "default",
+    scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None,
 ) -> KubeflowOrgV1XGBoostJob:
     return KubeflowOrgV1XGBoostJob(
-        api_version="kubeflow.org/v1",
-        kind="XGBoostJob",
+        api_version=constants.API_VERSION,
+        kind=constants.XGBOOSTJOB_KIND,
         metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace),
         spec=KubeflowOrgV1XGBoostJobSpec(
             run_policy=KubeflowOrgV1RunPolicy(
diff --git a/sdk/python/test/e2e/utils.py b/sdk/python/test/e2e/utils.py
index 32994be79c..a52393bbfc 100644
--- a/sdk/python/test/e2e/utils.py
+++ b/sdk/python/test/e2e/utils.py
@@ -11,65 +11,57 @@
 logging.getLogger().setLevel(logging.INFO)
 
 
-def verify_unschedulable_job_e2e(
-    client: TrainingClient, name: str, namespace: str, job_kind: str
-):
+def verify_unschedulable_job_e2e(client: TrainingClient, name: str, namespace: str):
     """Verify unschedulable Training Job e2e test."""
-    logging.info(f"\n\n\n{job_kind} is creating")
-    client.wait_for_job_conditions(name, namespace, job_kind, {constants.JOB_CONDITION_CREATED})
+    logging.info(f"\n\n\n{client.job_kind} is creating")
+    job = client.wait_for_job_conditions(
+        name, namespace, expected_conditions={constants.JOB_CONDITION_CREATED}
+    )
 
     logging.info("Checking 3 times that pods are not scheduled")
     for num in range(3):
         logging.info(f"Number of attempts: {int(num)+1}/3")
-        # Job should have a Created condition.
-        if not client.is_job_created(name, namespace, job_kind):
-            raise Exception(f"{job_kind} should be in Created condition")
 
-        # Job shouldn't have a Running condition.
-        if client.is_job_running(name, namespace, job_kind):
-            raise Exception(f"{job_kind} shouldn't be in Running condition")
+        # Job should have correct conditions
+        if not client.is_job_created(job=job) or client.is_job_running(job=job):
+            raise Exception(
+                f"{client.job_kind} should be in Created condition. "
+                f"{client.job_kind} should not be in Running condition."
+            )
 
         logging.info("Sleeping 5 seconds...")
         time.sleep(5)
 
 
 def verify_job_e2e(
-    client: TrainingClient, name: str, namespace: str, job_kind: str, container: str, timeout: int = 600
+    client: TrainingClient,
+    name: str,
+    namespace: str,
+    wait_timeout: int = 600,
 ):
     """Verify Training Job e2e test."""
 
     # Wait until Job is Succeeded.
-    logging.info(f"\n\n\n{job_kind} is running")
-    client.wait_for_job_conditions(name, namespace, job_kind, timeout=timeout)
+    logging.info(f"\n\n\n{client.job_kind} is running")
+    job = client.wait_for_job_conditions(name, namespace, wait_timeout=wait_timeout)
 
     # Job should have Created, Running, and Succeeded conditions.
-    conditions = client.get_job_conditions(name, namespace, job_kind)
+    conditions = client.get_job_conditions(job=job)
     if len(conditions) != 3:
-        raise Exception(f"{job_kind} conditions are invalid: {conditions}")
+        raise Exception(f"{client.job_kind} conditions are invalid: {conditions}")
 
     # Job should have correct conditions.
-    if not client.is_job_created(name, namespace, job_kind):
-        raise Exception(f"{job_kind} should be in Created condition")
-
-    if client.is_job_running(name, namespace, job_kind):
-        raise Exception(f"{job_kind} should not be in Running condition")
-
-    if client.is_job_restarting(name, namespace, job_kind):
-        raise Exception(f"{job_kind} should not be in Restarting condition")
-
-    if not client.is_job_succeeded(name, namespace, job_kind):
-        raise Exception(f"{job_kind} should be in Succeeded condition")
-
-    if client.is_job_failed(name, namespace, job_kind):
-        raise Exception(f"{job_kind} should not be in Failed condition")
-
-    # Print Job pod names.
-    logging.info(f"\n\n\n{job_kind} pod names")
-    logging.info(client.get_job_pod_names(name, namespace))
-
-    # Print Job logs.
-    logging.info(f"\n\n\n{job_kind} logs")
-    client.get_job_logs(name, namespace, container=container)
+    if (
+        not client.is_job_created(job=job)
+        or not client.is_job_succeeded(job=job)
+        or client.is_job_running(job=job)
+        or client.is_job_restarting(job=job)
+        or client.is_job_failed(job=job)
+    ):
+        raise Exception(
+            f"{client.job_kind} should be in Succeeded and Created conditions. "
+            f"{client.job_kind} should not be in Running, Restarting, or Failed conditions."
+        )
 
 
 def get_pod_spec_scheduler_name(gang_scheduler_name: str) -> str:
@@ -79,3 +71,17 @@ def get_pod_spec_scheduler_name(gang_scheduler_name: str) -> str:
         return TEST_GANG_SCHEDULER_NAME_VOLCANO
 
     return ""
+
+
+def print_job_results(client: TrainingClient, name: str, namespace: str):
+    # Print Job.
+    logging.info(f"\n\n\n{client.job_kind} info")
+    logging.info(client.get_job(name, namespace))
+
+    # Print Job pod names.
+    logging.info(f"\n\n\n{client.job_kind} pod names")
+    logging.info(client.get_job_pod_names(name, namespace))
+
+    # Print Job logs.
+    logging.info(f"\n\n\n{client.job_kind} logs")
+    client.get_job_logs(name, namespace)