diff --git a/build/images/tf_operator/Dockerfile b/build/images/tf_operator/Dockerfile
deleted file mode 100644
index b83f31d2a7..0000000000
--- a/build/images/tf_operator/Dockerfile
+++ /dev/null
@@ -1,19 +0,0 @@
-FROM golang:1.13.5 AS build-image
-
-ADD . /go/src/github.com/kubeflow/tf-operator
-
-WORKDIR /go/src/github.com/kubeflow/tf-operator
-
-RUN if [ "$(uname -m)" = "aarch64" ]; then \
-        GO111MODULE="on" CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o tf-operator.v1 ./cmd/tf-operator.v1; \
-    else \
-        GO111MODULE="on" CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o tf-operator.v1 ./cmd/tf-operator.v1; \
-    fi
-
-FROM gcr.io/distroless/base-debian10
-
-COPY third_party/library/license.txt /opt/license.txt
-
-COPY --from=build-image /go/src/github.com/kubeflow/tf-operator/tf-operator.v1 /opt/
-
-ENTRYPOINT ["/opt/tf-operator.v1"]
diff --git a/cmd/tf-operator.v1/app/options/options.go b/cmd/tf-operator.v1/app/options/options.go
deleted file mode 100644
index 433f94d03d..0000000000
--- a/cmd/tf-operator.v1/app/options/options.go
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package options
-
-import (
-	"flag"
-	"time"
-
-	v1 "k8s.io/api/core/v1"
-)
-
-const DefaultResyncPeriod = 12 * time.Hour
-
-// ServerOption is the main context object for the controller manager.
-type ServerOption struct {
-	Kubeconfig           string
-	MasterURL            string
-	Threadiness          int
-	PrintVersion         bool
-	JSONLogFormat        bool
-	EnableGangScheduling bool
-	GangSchedulerName    string
-	Namespace            string
-	MonitoringPort       int
-	ResyncPeriod         time.Duration
-	// QPS indicates the maximum QPS to the master from this client.
-	// If it's zero, the created RESTClient will use DefaultQPS: 5
-	QPS int
-	// Maximum burst for throttle.
-	// If it's zero, the created RESTClient will use DefaultBurst: 10.
-	Burst int
-}
-
-// NewServerOption creates a new CMServer with a default config.
-func NewServerOption() *ServerOption {
-	s := ServerOption{}
-	return &s
-}
-
-// AddFlags adds flags for a specific CMServer to the specified FlagSet.
-func (s *ServerOption) AddFlags(fs *flag.FlagSet) {
-	//fs.StringVar(&s.Kubeconfig, "kubeconfig", "", "The path of kubeconfig file")
-
-	fs.StringVar(&s.MasterURL, "master", "",
-		`The url of the Kubernetes API server,
-		 will overrides any value in kubeconfig, only required if out-of-cluster.`)
-
-	fs.StringVar(&s.Namespace, "namespace", v1.NamespaceAll,
-		`The namespace to monitor tfjobs. If unset, it monitors all namespaces cluster-wide.
-                If set, it only monitors tfjobs in the given namespace.`)
-
-	fs.IntVar(&s.Threadiness, "threadiness", 1,
-		`How many threads to process the main logic`)
-
-	fs.BoolVar(&s.PrintVersion, "version", false, "Show version and quit")
-
-	fs.BoolVar(&s.JSONLogFormat, "json-log-format", true,
-		"Set true to use json style log format. Set false to use plaintext style log format")
-
-	fs.BoolVar(&s.EnableGangScheduling, "enable-gang-scheduling", false, "Set true to enable gang scheduling")
-	fs.StringVar(&s.GangSchedulerName, "gang-scheduler-name", "volcano", "The scheduler to gang-schedule tfjobs, defaults to volcano")
-
-	fs.IntVar(&s.MonitoringPort, "monitoring-port", 8443,
-		`Endpoint port for displaying monitoring metrics. 
-It can be set to "0" to disable the metrics serving.`)
-
-	fs.DurationVar(&s.ResyncPeriod, "resyc-period", DefaultResyncPeriod, "Resync interval of the tf-operator")
-
-	fs.IntVar(&s.QPS, "qps", 5, "QPS indicates the maximum QPS to the master from this client.")
-	fs.IntVar(&s.Burst, "burst", 10, "Maximum burst for throttle.")
-}
diff --git a/cmd/tf-operator.v1/app/server.go b/cmd/tf-operator.v1/app/server.go
deleted file mode 100644
index 2bb6d6ef39..0000000000
--- a/cmd/tf-operator.v1/app/server.go
+++ /dev/null
@@ -1,251 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package app
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"time"
-
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-	log "github.com/sirupsen/logrus"
-	corev1 "k8s.io/api/core/v1"
-	apiextensionclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
-	"k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/util/uuid"
-	kubeinformers "k8s.io/client-go/informers"
-	kubeclientset "k8s.io/client-go/kubernetes"
-	restclientset "k8s.io/client-go/rest"
-	"k8s.io/client-go/tools/clientcmd"
-	election "k8s.io/client-go/tools/leaderelection"
-	"k8s.io/client-go/tools/leaderelection/resourcelock"
-	"k8s.io/client-go/tools/record"
-	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
-
-	"github.com/kubeflow/common/pkg/util/signals"
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
-	tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
-	"github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/scheme"
-	tfjobinformers "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions"
-	"github.com/kubeflow/tf-operator/pkg/common"
-	controller "github.com/kubeflow/tf-operator/pkg/controller.v1/tensorflow"
-	"github.com/kubeflow/tf-operator/pkg/version"
-)
-
-const (
-	apiVersion = "v1"
-)
-
-var (
-	// leader election config
-	leaseDuration = 15 * time.Second
-	renewDuration = 5 * time.Second
-	retryPeriod   = 3 * time.Second
-)
-
-// RecommendedKubeConfigPathEnv is the environment variable name for kubeconfig.
-const RecommendedKubeConfigPathEnv = "KUBECONFIG"
-
-var (
-	isLeader = promauto.NewGauge(prometheus.GaugeOpts{
-		Name: "tf_operator_is_leader",
-		Help: "Is this client the leader of this tf-operator client set?",
-	})
-)
-
-// Run runs the server.
-func Run(opt *options.ServerOption) error {
-	// Check if the -version flag was passed and, if so, print the version and exit.
-	if opt.PrintVersion {
-		version.PrintVersionAndExit(apiVersion)
-	}
-
-	namespace := os.Getenv(common.EnvKubeflowNamespace)
-	if len(namespace) == 0 {
-		log.Infof("EnvKubeflowNamespace not set, use default namespace %s",
-			metav1.NamespaceDefault)
-		namespace = metav1.NamespaceDefault
-	}
-	if opt.Namespace == corev1.NamespaceAll {
-		log.Info("Using cluster scoped operator")
-	} else {
-		log.Infof("Scoping operator to namespace %s", opt.Namespace)
-	}
-
-	// To help debugging, immediately log version.
-	log.Infof("%+v", version.Info(apiVersion))
-
-	// Set up signals so we handle the first shutdown signal gracefully.
-	stopCh := signals.SetupSignalHandler()
-
-	// Note: ENV KUBECONFIG will overwrite user defined Kubeconfig option.
-	if len(os.Getenv(RecommendedKubeConfigPathEnv)) > 0 {
-		// use the current context in kubeconfig
-		// This is very useful for running locally.
-		opt.Kubeconfig = os.Getenv(RecommendedKubeConfigPathEnv)
-	}
-
-	// Get kubernetes config.
-	kcfg, err := clientcmd.BuildConfigFromFlags(opt.MasterURL, opt.Kubeconfig)
-	if err != nil {
-		log.Fatalf("Error building kubeconfig: %s", err.Error())
-	}
-
-	// Set client qps and burst by opt.
-	kcfg.QPS = float32(opt.QPS)
-	kcfg.Burst = opt.Burst
-	log.Infof(
-		"Creating client sets and informers with QPS %d, burst %d, resync period %s",
-		opt.QPS, opt.Burst, opt.ResyncPeriod.String())
-
-	// Create clients.
-	kubeClientSet, leaderElectionClientSet,
-		apiextensionClientSet, tfJobClientSet,
-		volcanoClientSet, err := createClientSets(kcfg)
-	if err != nil {
-		log.Fatalf("Error create client set : %s", err.Error())
-		return err
-	}
-	if !checkCRDExists(apiextensionClientSet, opt.Namespace) {
-		return fmt.Errorf("Failed to get the expected TFJobs with API version %s",
-			tfJobClientSet.KubeflowV1().RESTClient().APIVersion())
-	}
-	// Create informer factory.
-	kubeInformerFactory := kubeinformers.NewFilteredSharedInformerFactory(kubeClientSet, opt.ResyncPeriod, opt.Namespace, nil)
-	tfJobInformerFactory := tfjobinformers.NewSharedInformerFactory(tfJobClientSet, opt.ResyncPeriod)
-
-	unstructuredInformer := controller.NewUnstructuredTFJobInformer(
-		kcfg, opt.Namespace, opt.ResyncPeriod)
-
-	// Create tf controller.
-	tc := controller.NewTFController(unstructuredInformer, kubeClientSet, volcanoClientSet, tfJobClientSet, kubeInformerFactory, tfJobInformerFactory, *opt)
-
-	// Start informer goroutines.
-	go kubeInformerFactory.Start(stopCh)
-
-	// We do not use the generated informer because of
-	// https://github.com/kubeflow/tf-operator/issues/561
-	// go tfJobInformerFactory.Start(stopCh)
-	go unstructuredInformer.Informer().Run(stopCh)
-
-	// Set leader election start function.
-	run := func(context.Context) {
-		isLeader.Set(1)
-		if err := tc.Run(opt.Threadiness, stopCh); err != nil {
-			log.Errorf("Failed to run the controller: %v", err)
-		}
-	}
-
-	id, err := os.Hostname()
-	if err != nil {
-		return fmt.Errorf("failed to get hostname: %v", err)
-	}
-	// add a uniquifier so that two processes on the same host don't accidentally both become active
-	id = id + "_" + string(uuid.NewUUID())
-
-	// Prepare event clients.
-	eventBroadcaster := record.NewBroadcaster()
-	if err = corev1.AddToScheme(scheme.Scheme); err != nil {
-		return fmt.Errorf("CoreV1 Add Scheme failed: %v", err)
-	}
-	recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "tf-operator"})
-
-	rl := &resourcelock.EndpointsLock{
-		EndpointsMeta: metav1.ObjectMeta{
-			Namespace: namespace,
-			Name:      "tf-operator",
-		},
-		Client: leaderElectionClientSet.CoreV1(),
-		LockConfig: resourcelock.ResourceLockConfig{
-			Identity:      id,
-			EventRecorder: recorder,
-		},
-	}
-
-	// Start leader election.
-	election.RunOrDie(context.TODO(), election.LeaderElectionConfig{
-		Lock:          rl,
-		LeaseDuration: leaseDuration,
-		RenewDeadline: renewDuration,
-		RetryPeriod:   retryPeriod,
-		Callbacks: election.LeaderCallbacks{
-			OnStartedLeading: run,
-			OnStoppedLeading: func() {
-				isLeader.Set(0)
-				log.Fatalf("leader election lost")
-			},
-		},
-	})
-
-	return nil
-}
-
-func createClientSets(config *restclientset.Config) (
-	kubeclientset.Interface, kubeclientset.Interface,
-	apiextensionclientset.Interface, tfjobclientset.Interface,
-	volcanoclient.Interface, error) {
-
-	kubeClientSet, err := kubeclientset.NewForConfig(restclientset.AddUserAgent(config, "tf-operator"))
-	if err != nil {
-		return nil, nil, nil, nil, nil, err
-	}
-
-	leaderElectionClientSet, err := kubeclientset.NewForConfig(restclientset.AddUserAgent(config, "leader-election"))
-	if err != nil {
-		return nil, nil, nil, nil, nil, err
-	}
-
-	apiextensionClientSet, err := apiextensionclientset.NewForConfig(restclientset.AddUserAgent(config, "leader-election"))
-	if err != nil {
-		return nil, nil, nil, nil, nil, err
-	}
-
-	tfJobClientSet, err := tfjobclientset.NewForConfig(config)
-	if err != nil {
-		return nil, nil, nil, nil, nil, err
-	}
-
-	volcanoClientSet, err := volcanoclient.NewForConfig(restclientset.AddUserAgent(config, "volcano"))
-	if err != nil {
-		return nil, nil, nil, nil, nil, err
-	}
-
-	return kubeClientSet, leaderElectionClientSet, apiextensionClientSet, tfJobClientSet, volcanoClientSet, nil
-}
-
-// checkCRDExists checks if the CRD exists.
-func checkCRDExists(clientset apiextensionclientset.Interface, namespace string) bool {
-	crd, err := clientset.ApiextensionsV1beta1().
-		CustomResourceDefinitions().
-		Get(context.TODO(), "tfjobs.kubeflow.org", metav1.GetOptions{})
-
-	if err != nil {
-		log.Error(err)
-		if _, ok := err.(*errors.StatusError); ok {
-			if errors.IsNotFound(err) {
-				return false
-			}
-		} else {
-			return false
-		}
-	}
-
-	log.Infof("CRD %s/%s %s is registered",
-		crd.Spec.Group, crd.Spec.Version, crd.Spec.Names.Singular)
-	return true
-}
diff --git a/cmd/tf-operator.v1/main.go b/cmd/tf-operator.v1/main.go
deleted file mode 100644
index 39f154cecc..0000000000
--- a/cmd/tf-operator.v1/main.go
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//       http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"net/http"
-	_ "net/http/pprof"
-	"strconv"
-
-	"github.com/onrik/logrus/filename"
-	"github.com/prometheus/client_golang/prometheus/promhttp"
-	log "github.com/sirupsen/logrus"
-
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app"
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
-)
-
-func init() {
-	// Add filename as one of the fields of the structured log message.
-	filenameHook := filename.NewHook()
-	filenameHook.Field = "filename"
-	log.AddHook(filenameHook)
-}
-
-func startMonitoring(monitoringPort int) {
-	if monitoringPort != 0 {
-		go func() {
-			log.Infof("Setting up client for monitoring on port: %s", strconv.Itoa(monitoringPort))
-			http.Handle("/metrics", promhttp.Handler())
-			err := http.ListenAndServe(fmt.Sprintf(":%s", strconv.Itoa(monitoringPort)), nil)
-			if err != nil {
-				log.Error("Monitoring endpoint setup failure.", err)
-			}
-		}()
-	}
-}
-
-func main() {
-	s := options.NewServerOption()
-	s.AddFlags(flag.CommandLine)
-
-	flag.Parse()
-
-	if s.JSONLogFormat {
-		// Output logs in a json format so that it can be parsed by services like Stackdriver.
-		log.SetFormatter(&log.JSONFormatter{})
-	}
-
-	startMonitoring(s.MonitoringPort)
-
-	if err := app.Run(s); err != nil {
-		log.Fatalf("Failed to run: %v", err)
-	}
-}
diff --git a/docs/development/developer_guide.md b/docs/development/developer_guide.md
index 2196fa6d34..34eda7cc50 100644
--- a/docs/development/developer_guide.md
+++ b/docs/development/developer_guide.md
@@ -22,7 +22,7 @@ GO111MODULE="on" go mod vendor
 Build it
 
 ```sh
-go install github.com/kubeflow/tf-operator/cmd/tf-operator.v1
+go install github.com/kubeflow/tf-operator/cmd/training-operator.v1
 ```
 
 ## Running the Operator Locally
@@ -57,7 +57,7 @@ export KUBEFLOW_NAMESPACE=$(your_namespace)
 After the cluster is up, the TFJob CRD should be created on the cluster.
 
 ```bash
-kubectl create -f ./examples/crd/crd-v1.yaml
+make install
 ```
 
 ### Run Operator
@@ -65,7 +65,7 @@ kubectl create -f ./examples/crd/crd-v1.yaml
 Now we are ready to run operator locally:
 
 ```sh
-tf-operator
+make run
 ```
 
 To verify local operator is working, create an example job and you should see jobs created by it.
diff --git a/pkg/controller.v1/mxnet/mxjob_controller.go b/pkg/controller.v1/mxnet/mxjob_controller.go
index 970699f09d..888569575b 100644
--- a/pkg/controller.v1/mxnet/mxjob_controller.go
+++ b/pkg/controller.v1/mxnet/mxjob_controller.go
@@ -57,7 +57,7 @@ import (
 )
 
 const (
-	controllerName = "mxnet-operator"
+	controllerName = "mxjob-controller"
 
 	// mxJobCreatedReason is added in a mxjob when it is created.
 	mxJobCreatedReason = "MXJobCreated"
@@ -73,7 +73,7 @@ const (
 
 var (
 	jobOwnerKey = ".metadata.controller"
-	// DefaultMXControllerConfiguration is the suggested mxnet-operator configuration for production.
+	// DefaultMXControllerConfiguration is the suggested mxnetjob-controller configuration for production.
 	DefaultMXControllerConfiguration = common.JobControllerConfiguration{
 		ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 15 * time.Second},
 		EnableGangScheduling:     false,
diff --git a/pkg/controller.v1/pytorch/pytorchjob_controller.go b/pkg/controller.v1/pytorch/pytorchjob_controller.go
index a1b8bd6a22..b52abd359c 100644
--- a/pkg/controller.v1/pytorch/pytorchjob_controller.go
+++ b/pkg/controller.v1/pytorch/pytorchjob_controller.go
@@ -56,7 +56,7 @@ import (
 )
 
 const (
-	controllerName = "pytorchjob-operator"
+	controllerName = "pytorchjob-controller"
 )
 
 var (
diff --git a/pkg/controller.v1/tensorflow/controller.go b/pkg/controller.v1/tensorflow/controller.go
deleted file mode 100644
index 4e6285a6a3..0000000000
--- a/pkg/controller.v1/tensorflow/controller.go
+++ /dev/null
@@ -1,409 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package tensorflow provides a Kubernetes controller for a TFJob resource.
-package tensorflow
-
-import (
-	"context"
-	"fmt"
-	"time"
-
-	"github.com/kubeflow/tf-operator/pkg/common/util"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-
-	log "github.com/sirupsen/logrus"
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-	"k8s.io/apimachinery/pkg/util/wait"
-	kubeinformers "k8s.io/client-go/informers"
-	kubeclientset "k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/kubernetes/scheme"
-	"k8s.io/client-go/tools/cache"
-
-	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	"github.com/kubeflow/common/pkg/controller.v1/common"
-	tflogger "github.com/kubeflow/common/pkg/util"
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
-	tfjobscheme "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/scheme"
-	tfjobinformers "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions"
-	tfjobinformersv1 "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/tensorflow/v1"
-	tfjoblisters "github.com/kubeflow/tf-operator/pkg/client/listers/tensorflow/v1"
-	"k8s.io/apimachinery/pkg/runtime/schema"
-	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
-)
-
-const (
-	controllerName = "tf-operator"
-
-	// labels for pods and servers.
-	tfReplicaTypeLabel  = "replica-type"
-	tfReplicaIndexLabel = "replica-index"
-	labelGroupName      = "group-name"
-	// Deprecated label for backwards compatibility. Has to be removed
-	labelTFJobName = "tf-job-name"
-	// volcanoTaskSpecKey task spec key used in pod annotation when EnableGangScheduling is true
-	volcanoTaskSpecKey = "volcano.sh/task-spec"
-)
-
-var (
-	// KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc.
-	// IndexerInformer uses a delta queue, therefore for deletes we have to use this
-	// key function but it should be just fine for non delete events.
-	KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
-
-	tfJobsDeletedCount = promauto.NewCounterVec(
-		prometheus.CounterOpts{
-			Name: "tf_operator_jobs_deleted_total",
-			Help: "Counts number of TF jobs deleted",
-		},
-		[]string{"job_namespace"},
-	)
-)
-
-// TFController is the type for TFJob Controller, which manages
-// the lifecycle of TFJobs.
-type TFController struct {
-	common.JobController
-
-	// tfJobClientSet is a clientset for CRD TFJob.
-	tfJobClientSet tfjobclientset.Interface
-
-	// To allow injection of sync functions for testing.
-	syncHandler func(string) (bool, error)
-
-	// tfJobInformer is a temporary field for unstructured informer support.
-	tfJobInformer cache.SharedIndexInformer
-
-	// Listers for TFJob, Pod and Service
-	// tfJobLister can list/get tfjobs from the shared informer's store.
-	tfJobLister tfjoblisters.TFJobLister
-
-	// tfJobInformerSynced returns true if the tfjob store has been synced at least once.
-	tfJobInformerSynced cache.InformerSynced
-}
-
-// NewTFController returns a new TFJob controller.
-func NewTFController(
-	// This variable is for unstructured informer.
-	tfJobInformer tfjobinformersv1.TFJobInformer,
-	kubeClientSet kubeclientset.Interface,
-	volcanoClientSet volcanoclient.Interface,
-	tfJobClientSet tfjobclientset.Interface,
-	kubeInformerFactory kubeinformers.SharedInformerFactory,
-	// This field is not used now but we keep it since it will be used
-	// after we support CRD validation.
-	tfJobInformerFactory tfjobinformers.SharedInformerFactory,
-	option options.ServerOption) *TFController {
-
-	err := tfjobscheme.AddToScheme(scheme.Scheme)
-	if err != nil {
-		log.Fatalf("Failed to add tfjob scheme: %v", err)
-	}
-
-	log.Info("Creating TFJob controller")
-	// Create new TFController.
-	tc := &TFController{
-		tfJobClientSet: tfJobClientSet,
-	}
-
-	// Create base controller
-	log.Info("Creating Job controller")
-
-	jc := common.NewJobController(tc, metav1.Duration{Duration: 15 * time.Second},
-		option.EnableGangScheduling, kubeClientSet, volcanoClientSet, kubeInformerFactory, tfv1.Plural)
-
-	// Set sync handler.
-	tc.syncHandler = tc.syncTFJob
-
-	// TODO(ChanYiLin): these are originally for testing, but with using common library,
-	// we can not replcae the function. Also need to update or remove some tests
-
-	// tc.updateStatusHandler = tc.UpdateJobStatusInApiServer
-	// set delete handler.
-	// tc.deleteTFJobHandler = tc.DeleteJob
-
-	// Set up an event handler for when tfjob resources change.
-	tfJobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
-		AddFunc:    tc.addTFJob,
-		UpdateFunc: tc.updateTFJob,
-		// This will enter the sync loop and no-op,
-		// because the tfjob has been deleted from the store.
-		DeleteFunc: tc.enqueueTFJob,
-	})
-
-	tc.tfJobInformer = tfJobInformer.Informer()
-	tc.tfJobLister = tfJobInformer.Lister()
-	tc.tfJobInformerSynced = tfJobInformer.Informer().HasSynced
-
-	// Create pod informer.
-	podInformer := kubeInformerFactory.Core().V1().Pods()
-
-	// Set up an event handler for when pod resources change
-	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
-		AddFunc:    jc.AddPod,
-		UpdateFunc: jc.UpdatePod,
-		DeleteFunc: jc.DeletePod,
-	})
-
-	// tc.PodLister = podInformer.Lister()
-	// tc.PodInformerSynced = podInformer.Informer().HasSynced
-	jc.PodLister = podInformer.Lister()
-	jc.PodInformerSynced = podInformer.Informer().HasSynced
-
-	// Create service informer.
-	serviceInformer := kubeInformerFactory.Core().V1().Services()
-
-	// Set up an event handler for when service resources change.
-	serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
-		AddFunc:    jc.AddService,
-		UpdateFunc: jc.UpdateService,
-		DeleteFunc: jc.DeleteService,
-	})
-
-	// tc.ServiceLister = serviceInformer.Lister()
-	// tc.ServiceInformerSynced = serviceInformer.Informer().HasSynced
-	jc.ServiceLister = serviceInformer.Lister()
-	jc.ServiceInformerSynced = serviceInformer.Informer().HasSynced
-
-	tc.JobController = jc
-
-	return tc
-}
-
-// Run will set up the event handlers for types we are interested in, as well
-// as syncing informer caches and starting workers. It will block until stopCh
-// is closed, at which point it will shutdown the workqueue and wait for
-// workers to finish processing their current work items.
-func (tc *TFController) Run(threadiness int, stopCh <-chan struct{}) error {
-	defer utilruntime.HandleCrash()
-	defer tc.WorkQueue.ShutDown()
-
-	// Start the informer factories to begin populating the informer caches.
-	log.Info("Starting TFJob controller")
-
-	// Wait for the caches to be synced before starting workers.
-	log.Info("Waiting for informer caches to sync")
-
-	if ok := cache.WaitForCacheSync(stopCh, tc.tfJobInformerSynced,
-		tc.PodInformerSynced, tc.ServiceInformerSynced); !ok {
-		return fmt.Errorf("failed to wait for caches to sync")
-	}
-	log.Infof("Starting %v workers", threadiness)
-	// Launch workers to process TFJob resources.
-	for i := 0; i < threadiness; i++ {
-		go wait.Until(tc.runWorker, time.Second, stopCh)
-	}
-
-	log.Info("Started workers")
-	<-stopCh
-	log.Info("Shutting down workers")
-
-	return nil
-}
-
-// runWorker is a long-running function that will continually call the
-// processNextWorkItem function in order to read and process a message on the
-// workqueue.
-func (tc *TFController) runWorker() {
-	for tc.processNextWorkItem() {
-	}
-}
-
-// processNextWorkItem will read a single work item off the workqueue and
-// attempt to process it, by calling the syncHandler.
-func (tc *TFController) processNextWorkItem() bool {
-	obj, quit := tc.WorkQueue.Get()
-	if quit {
-		return false
-	}
-	defer tc.WorkQueue.Done(obj)
-
-	var key string
-	var ok bool
-	if key, ok = obj.(string); !ok {
-		// As the item in the workqueue is actually invalid, we call
-		// Forget here else we'd go into a loop of attempting to
-		// process a work item that is invalid.
-		tc.WorkQueue.Forget(obj)
-		utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj))
-		return true
-	}
-	logger := tflogger.LoggerForKey(key)
-
-	tfJob, err := tc.getTFJobFromKey(key)
-	if err != nil {
-		if err == errNotExists {
-			logger.Infof("TFJob has been deleted: %v", key)
-			namespace, _, keyerr := cache.SplitMetaNamespaceKey(key)
-			if keyerr == nil && len(namespace) != 0 {
-				tfJobsDeletedCount.WithLabelValues(namespace).Inc()
-			} else {
-				logger.Errorf("Invalid TFJob key %s: Namespace is missing %v", key, keyerr)
-			}
-			return true
-		}
-
-		// Log the failure to conditions.
-		logger.Errorf("Failed to get TFJob from key %s: %v", key, err)
-		if err == errFailedMarshal {
-			errMsg := fmt.Sprintf("Failed to unmarshal the object to TFJob object: %v", err)
-			tflogger.LoggerForJob(tfJob).Warn(errMsg)
-			tc.Recorder.Event(tfJob, v1.EventTypeWarning, failedMarshalTFJobReason, errMsg)
-		}
-
-		return true
-	}
-
-	// Sync TFJob to match the actual state to this desired state.
-	forget, err := tc.syncHandler(key)
-	if err == nil {
-		if forget {
-			tc.WorkQueue.Forget(key)
-		}
-		return true
-	}
-
-	utilruntime.HandleError(fmt.Errorf("error syncing tfjob: %v", err))
-	tc.WorkQueue.AddRateLimited(key)
-
-	return true
-}
-
-func (tc *TFController) enqueueTFJob(tfjob interface{}) {
-	key, err := KeyFunc(tfjob)
-	if err != nil {
-		utilruntime.HandleError(fmt.Errorf("couldn't get key for tfjob object %#v: %v", tfjob, err))
-		return
-	}
-
-	// TODO: we may need add backoff here
-	tc.WorkQueue.Add(key)
-}
-
-// syncTFJob syncs the tfjob with the given key if it has had its expectations fulfilled, meaning
-// it did not expect to see any more of its pods/services created or deleted.
-// This function is not meant to be invoked concurrently with the same key.
-func (tc *TFController) syncTFJob(key string) (bool, error) {
-	startTime := time.Now()
-	logger := tflogger.LoggerForKey(key)
-	defer func() {
-		logger.Infof("Finished syncing tfjob %q (%v)", key, time.Since(startTime))
-	}()
-
-	namespace, name, err := cache.SplitMetaNamespaceKey(key)
-	if err != nil {
-		return false, err
-	}
-	if len(namespace) == 0 || len(name) == 0 {
-		return false, fmt.Errorf("invalid tfjob key %q: either namespace or name is missing", key)
-	}
-
-	sharedTFJob, err := tc.getTFJobFromName(namespace, name)
-	if err != nil {
-		if err == errNotExists {
-			logger.Infof("TFJob has been deleted: %v", key)
-			tfJobsDeletedCount.WithLabelValues(namespace).Inc()
-			return true, nil
-		}
-		return false, err
-	}
-
-	tfjob := sharedTFJob.DeepCopy()
-
-	// Sync tfjob every time if EnableDynamicWorker is true
-	jobKey, err := common.KeyFunc(tfjob)
-	if err != nil {
-		utilruntime.HandleError(fmt.Errorf("couldn't get jobKey for job object %#v: %v", tfjob, err))
-	}
-
-	replicaTypes := util.GetReplicaTypes(tfjob.Spec.TFReplicaSpecs)
-	tfjobNeedsSync := tfjob.Spec.EnableDynamicWorker || util.SatisfiedExpectations(tc.Expectations, jobKey, replicaTypes)
-
-	// Set default for the new tfjob.
-	scheme.Scheme.Default(tfjob)
-
-	var reconcileTFJobsErr error
-	if tfjobNeedsSync && tfjob.DeletionTimestamp == nil {
-		reconcileTFJobsErr = tc.ReconcileJobs(tfjob, tfjob.Spec.TFReplicaSpecs, tfjob.Status, &tfjob.Spec.RunPolicy)
-	}
-
-	if reconcileTFJobsErr != nil {
-		return false, reconcileTFJobsErr
-	}
-
-	return true, err
-}
-
-func (tc *TFController) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) {
-	return tc.getTFJobFromName(namespace, name)
-}
-
-func (tc *TFController) GetJobFromAPIClient(namespace, name string) (metav1.Object, error) {
-	return tc.tfJobClientSet.KubeflowV1().TFJobs(namespace).Get(context.TODO(), name, metav1.GetOptions{})
-}
-
-func (tc *TFController) GetAPIGroupVersionKind() schema.GroupVersionKind {
-	return tfv1.GroupVersion.WithKind(tfv1.Kind)
-}
-
-func (tc *TFController) GetAPIGroupVersion() schema.GroupVersion {
-	return tfv1.GroupVersion
-}
-
-func (tc *TFController) GetGroupNameLabelKey() string {
-	return labelGroupName
-}
-
-// Deprecated function for backwards compatibility. Has to be removed later
-func (tc *TFController) GetJobNameLabelKey() string {
-	return labelTFJobName
-}
-
-func (tc *TFController) GetGroupNameLabelValue() string {
-	return tfv1.GroupVersion.Group
-}
-
-func (tc *TFController) GetReplicaTypeLabelKey() string {
-	return tfReplicaTypeLabel
-}
-
-func (tc *TFController) GetReplicaIndexLabelKey() string {
-	return tfReplicaIndexLabel
-}
-
-func (tc *TFController) ControllerName() string {
-	return controllerName
-}
-
-func (tc *TFController) GetDefaultContainerName() string {
-	return tfv1.DefaultContainerName
-}
-
-func (tc *TFController) GetDefaultContainerPortName() string {
-	return tfv1.DefaultPortName
-}
-
-func (tc *TFController) IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, rtype commonv1.ReplicaType, index int) bool {
-
-	if ContainChieforMasterSpec(replicas) {
-		return rtype == tfv1.TFReplicaTypeChief || rtype == tfv1.TFReplicaTypeMaster
-	}
-	// else check if it is worker with index 0
-	return rtype == tfv1.TFReplicaTypeWorker && index == 0
-}
diff --git a/pkg/controller.v1/tensorflow/controller_test.go b/pkg/controller.v1/tensorflow/controller_test.go
deleted file mode 100644
index 45763fb751..0000000000
--- a/pkg/controller.v1/tensorflow/controller_test.go
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package controller provides a Kubernetes controller for a TFJob resource.
-package tensorflow
-
-import (
-	"testing"
-	"time"
-
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	kubeinformers "k8s.io/client-go/informers"
-	kubeclientset "k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/rest"
-	batchv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
-	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
-
-	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	"github.com/kubeflow/common/pkg/controller.v1/control"
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
-	tfjobinformers "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions"
-	"github.com/kubeflow/tf-operator/pkg/common/util/v1/testutil"
-)
-
-var (
-	tfJobRunning   = commonv1.JobRunning
-	tfJobSucceeded = commonv1.JobSucceeded
-)
-
-func newTFController(
-	config *rest.Config,
-	kubeClientSet kubeclientset.Interface,
-	volcanoClientSet volcanoclient.Interface,
-	tfJobClientSet tfjobclientset.Interface,
-	duration time.Duration,
-	option options.ServerOption,
-) (
-	*TFController,
-	kubeinformers.SharedInformerFactory, tfjobinformers.SharedInformerFactory,
-) {
-	kubeInformerFactory := kubeinformers.NewSharedInformerFactory(kubeClientSet, duration)
-	tfJobInformerFactory := tfjobinformers.NewSharedInformerFactory(tfJobClientSet, duration)
-
-	tfJobInformer := NewUnstructuredTFJobInformer(config, metav1.NamespaceAll, time.Hour*12)
-
-	ctr := NewTFController(tfJobInformer, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, kubeInformerFactory,
-		tfJobInformerFactory, option)
-	ctr.PodControl = &control.FakePodControl{}
-	ctr.ServiceControl = &control.FakeServiceControl{}
-	return ctr, kubeInformerFactory, tfJobInformerFactory
-}
-
-func TestNormalPath(t *testing.T) {
-	testCases := map[string]struct {
-		worker int
-		ps     int
-
-		// pod setup
-		// ControllerError error
-		// jobKeyForget    bool
-
-		pendingWorkerPods   int32
-		activeWorkerPods    int32
-		succeededWorkerPods int32
-		failedWorkerPods    int32
-
-		pendingPSPods   int32
-		activePSPods    int32
-		succeededPSPods int32
-		failedPSPods    int32
-
-		activeWorkerServices int32
-		activePSServices     int32
-
-		// expectations
-		expectedPodCreations     int32
-		expectedPodDeletions     int32
-		expectedServiceCreations int32
-
-		expectedActiveWorkerPods    int32
-		expectedSucceededWorkerPods int32
-		expectedFailedWorkerPods    int32
-
-		expectedActivePSPods    int32
-		expectedSucceededPSPods int32
-		expectedFailedPSPods    int32
-
-		expectedCondition       *commonv1.JobConditionType
-		expectedConditionReason string
-
-		// There are some cases that should not check start time since the field should be set in the previous sync loop.
-		needCheckStartTime bool
-	}{
-		"Local TFJob is created": {
-			1, 0,
-			0, 0, 0, 0,
-			0, 0, 0, 0,
-			0, 0,
-			1, 0, 1,
-			0, 0, 0,
-			0, 0, 0,
-			// We can not check if it is created since the condition is set in addTFJob.
-			nil, "",
-			false,
-		},
-		"Distributed TFJob (4 workers, 2 PS) is created": {
-			4, 2,
-			0, 0, 0, 0,
-			0, 0, 0, 0,
-			0, 0,
-			6, 0, 6,
-			0, 0, 0,
-			0, 0, 0,
-			nil, "",
-			false,
-		},
-		"Distributed TFJob (4 workers, 2 PS) is created and all replicas are pending": {
-			4, 2,
-			4, 0, 0, 0,
-			2, 0, 0, 0,
-			4, 2,
-			0, 0, 0,
-			0, 0, 0,
-			0, 0, 0,
-			nil, "",
-			false,
-		},
-		"Distributed TFJob (4 workers, 2 PS) is created and all replicas are running": {
-			4, 2,
-			0, 4, 0, 0,
-			0, 2, 0, 0,
-			4, 2,
-			0, 0, 0,
-			4, 0, 0,
-			2, 0, 0,
-			&tfJobRunning, tfJobRunningReason,
-			true,
-		},
-		"Distributed TFJob (4 workers, 2 PS) is created, 2 workers, 1 PS are pending": {
-			4, 2,
-			2, 0, 0, 0,
-			1, 0, 0, 0,
-			2, 1,
-			3, 0, 3,
-			0, 0, 0,
-			0, 0, 0,
-			nil, "",
-			false,
-		},
-		"Distributed TFJob (4 workers, 2 PS) is created, 2 workers, 1 PS are pending, 1 worker is running": {
-			4, 2,
-			2, 1, 0, 0,
-			1, 0, 0, 0,
-			3, 1,
-			2, 0, 2,
-			1, 0, 0,
-			0, 0, 0,
-			&tfJobRunning, tfJobRunningReason,
-			false,
-		},
-		"Distributed TFJob (4 workers, 2 PS) is created, 2 workers, 1 PS are pending, 1 worker is succeeded": {
-			4, 2,
-			2, 0, 1, 0,
-			1, 0, 0, 0,
-			3, 1,
-			2, 0, 2,
-			0, 1, 0,
-			0, 0, 0,
-			nil, "",
-			false,
-		},
-		"Distributed TFJob (4 workers, 2 PS) is succeeded": {
-			4, 2,
-			0, 0, 4, 0,
-			0, 0, 2, 0,
-			4, 2,
-			0, 0, 0,
-			0, 4, 0,
-			0, 2, 0,
-			&tfJobSucceeded, tfJobSucceededReason,
-			false,
-		},
-	}
-
-	for name, tc := range testCases {
-		// Prepare the clientset and controller for the test.
-		kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &v1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		// Prepare the volcano clientset and controller for the test.
-		volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &batchv1beta1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		config := &rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &tfv1.GroupVersion,
-			},
-		}
-		option := options.ServerOption{}
-		tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-		ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet, volcanoClientSet, tfJobClientSet, 0, option)
-		ctr.tfJobInformerSynced = testutil.AlwaysReady
-		ctr.PodInformerSynced = testutil.AlwaysReady
-		ctr.ServiceInformerSynced = testutil.AlwaysReady
-		tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-
-		// Run the test logic.
-		tfJob := testutil.NewTFJob(tc.worker, tc.ps)
-		unstructured, err := testutil.ConvertTFJobToUnstructured(tfJob)
-		if err != nil {
-			t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-		}
-
-		if err := tfJobIndexer.Add(unstructured); err != nil {
-			t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-		}
-
-		podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-		testutil.SetPodsStatuses(podIndexer, tfJob, testutil.LabelWorker, tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, nil, t)
-		testutil.SetPodsStatuses(podIndexer, tfJob, testutil.LabelPS, tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, nil, t)
-
-		serviceIndexer := kubeInformerFactory.Core().V1().Services().Informer().GetIndexer()
-		testutil.SetServices(serviceIndexer, tfJob, testutil.LabelWorker, tc.activeWorkerServices, t)
-		testutil.SetServices(serviceIndexer, tfJob, testutil.LabelPS, tc.activePSServices, t)
-
-		//_, err = ctr.syncTFJob(testutil.GetKey(tfJob, t))
-		_ = ctr.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy)
-
-		fakePodControl := ctr.PodControl.(*control.FakePodControl)
-		fakeServiceControl := ctr.ServiceControl.(*control.FakeServiceControl)
-		if int32(len(fakePodControl.Templates)) != tc.expectedPodCreations {
-			t.Errorf("%s: unexpected number of pod creates.  Expected %d, saw %d\n", name, tc.expectedPodCreations, len(fakePodControl.Templates))
-		}
-		if int32(len(fakeServiceControl.Templates)) != tc.expectedServiceCreations {
-			t.Errorf("%s: unexpected number of service creates.  Expected %d, saw %d\n", name, tc.expectedServiceCreations, len(fakeServiceControl.Templates))
-		}
-		if int32(len(fakePodControl.DeletePodName)) != tc.expectedPodDeletions {
-			t.Errorf("%s: unexpected number of pod deletes.  Expected %d, saw %d\n", name, tc.expectedPodDeletions, len(fakePodControl.DeletePodName))
-		}
-		// Each create should have an accompanying ControllerRef.
-		if len(fakePodControl.ControllerRefs) != int(tc.expectedPodCreations) {
-			t.Errorf("%s: unexpected number of ControllerRefs.  Expected %d, saw %d\n", name, tc.expectedPodCreations, len(fakePodControl.ControllerRefs))
-		}
-		// Make sure the ControllerRefs are correct.
-		for _, controllerRef := range fakePodControl.ControllerRefs {
-			if got, want := controllerRef.APIVersion, tfv1.GroupVersion.String(); got != want {
-				t.Errorf("controllerRef.APIVersion = %q, want %q", got, want)
-			}
-			if got, want := controllerRef.Kind, tfv1.Kind; got != want {
-				t.Errorf("controllerRef.Kind = %q, want %q", got, want)
-			}
-			if got, want := controllerRef.Name, tfJob.Name; got != want {
-				t.Errorf("controllerRef.Name = %q, want %q", got, want)
-			}
-			if got, want := controllerRef.UID, tfJob.UID; got != want {
-				t.Errorf("controllerRef.UID = %q, want %q", got, want)
-			}
-			if controllerRef.Controller == nil || !*controllerRef.Controller {
-				t.Errorf("controllerRef.Controller is not set to true")
-			}
-		}
-		// Validate worker status.
-		if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)] != nil {
-			if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)].Active != tc.expectedActiveWorkerPods {
-				t.Errorf("%s: unexpected number of active pods.  Expected %d, saw %d\n",
-					name, tc.expectedActiveWorkerPods,
-					tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)].Active)
-			}
-			if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)].Succeeded != tc.expectedSucceededWorkerPods {
-				t.Errorf("%s: unexpected number of succeeded pods.  Expected %d, saw %d\n",
-					name, tc.expectedSucceededWorkerPods,
-					tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)].Succeeded)
-			}
-			if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)].Failed != tc.expectedFailedWorkerPods {
-				t.Errorf("%s: unexpected number of failed pods.  Expected %d, saw %d\n",
-					name, tc.expectedFailedWorkerPods,
-					tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)].Failed)
-			}
-		}
-		// Validate PS status.
-		if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypePS)] != nil {
-			if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypePS)].Active != tc.expectedActivePSPods {
-				t.Errorf("%s: unexpected number of active pods.  Expected %d, saw %d\n",
-					name, tc.expectedActivePSPods,
-					tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypePS)].Active)
-			}
-			if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypePS)].Succeeded != tc.expectedSucceededPSPods {
-				t.Errorf("%s: unexpected number of succeeded pods.  Expected %d, saw %d\n",
-					name, tc.expectedSucceededPSPods,
-					tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypePS)].Succeeded)
-			}
-			if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypePS)].Failed != tc.expectedFailedPSPods {
-				t.Errorf("%s: unexpected number of failed pods.  Expected %d, saw %d\n",
-					name, tc.expectedFailedPSPods,
-					tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypePS)].Failed)
-			}
-		}
-		// Validate StartTime.
-		if tc.needCheckStartTime && tfJob.Status.StartTime == nil {
-			t.Errorf("%s: StartTime was not set", name)
-		}
-		// Validate conditions.
-		if tc.expectedCondition != nil && !testutil.CheckCondition(tfJob, *tc.expectedCondition, tc.expectedConditionReason) {
-			t.Errorf("%s: expected condition %#v, got %#v", name, *tc.expectedCondition, tfJob.Status.Conditions)
-		}
-	}
-}
-
-func TestRun(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-
-	stopCh := make(chan struct{})
-	go func() {
-		// It is a hack to let the controller stop to run without errors.
-		// We can not just send a struct to stopCh because there are multiple
-		// receivers in controller.Run.
-		time.Sleep(testutil.SleepInterval)
-		stopCh <- struct{}{}
-	}()
-	err := ctr.Run(testutil.ThreadCount, stopCh)
-	if err != nil {
-		t.Errorf("Failed to run: %v", err)
-	}
-}
diff --git a/pkg/controller.v1/tensorflow/informer.go b/pkg/controller.v1/tensorflow/informer.go
deleted file mode 100644
index 03e8207709..0000000000
--- a/pkg/controller.v1/tensorflow/informer.go
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright 2021 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-package tensorflow
-
-import (
-	"fmt"
-	"time"
-
-	log "github.com/sirupsen/logrus"
-	metav1unstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
-	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/runtime/schema"
-	"k8s.io/client-go/dynamic"
-	restclientset "k8s.io/client-go/rest"
-	"k8s.io/client-go/tools/cache"
-
-	tflogger "github.com/kubeflow/common/pkg/util"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	"github.com/kubeflow/tf-operator/pkg/apis/tensorflow/validation"
-	tfjobinformers "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions"
-	tfjobinformersv1 "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/tensorflow/v1"
-	"github.com/kubeflow/tf-operator/pkg/common/util/v1/unstructured"
-)
-
-const (
-	failedMarshalMsg = "Failed to marshal the object to TFJob: %v"
-)
-
-var (
-	errGetFromKey    = fmt.Errorf("failed to get TFJob from key")
-	errNotExists     = fmt.Errorf("the object is not found")
-	errFailedMarshal = fmt.Errorf("failed to marshal the object to TFJob")
-)
-
-func NewUnstructuredTFJobInformer(restConfig *restclientset.Config, namespace string, resyncPeriod time.Duration) tfjobinformersv1.TFJobInformer {
-	dclient, err := dynamic.NewForConfig(restConfig)
-	if err != nil {
-		panic(err)
-	}
-
-	resource := schema.GroupVersionResource{
-		Group:    tfv1.GroupVersion.Group,
-		Version:  tfv1.GroupVersion.Version,
-		Resource: "tfjobs",
-	}
-
-	informer := unstructured.NewTFJobInformer(
-		resource,
-		dclient,
-		namespace,
-		resyncPeriod,
-		cache.Indexers{},
-	)
-	return informer
-}
-
-// NewTFJobInformer returns TFJobInformer from the given factory.
-func (tc *TFController) NewTFJobInformer(tfJobInformerFactory tfjobinformers.SharedInformerFactory) tfjobinformersv1.TFJobInformer {
-	return tfJobInformerFactory.Kubeflow().V1().TFJobs()
-}
-
-func (tc *TFController) getTFJobFromName(namespace, name string) (*tfv1.TFJob, error) {
-	key := fmt.Sprintf("%s/%s", namespace, name)
-	return tc.getTFJobFromKey(key)
-}
-
-func (tc *TFController) getTFJobFromKey(key string) (*tfv1.TFJob, error) {
-	// Check if the key exists.
-	obj, exists, err := tc.tfJobInformer.GetIndexer().GetByKey(key)
-	logger := tflogger.LoggerForKey(key)
-	if err != nil {
-		logger.Errorf("Failed to get TFJob '%s' from informer index: %+v", key, err)
-		return nil, errGetFromKey
-	}
-	if !exists {
-		// This happens after a tfjob was deleted, but the work queue still had an entry for it.
-		return nil, errNotExists
-	}
-
-	return tfJobFromUnstructured(obj)
-}
-
-func tfJobFromUnstructured(obj interface{}) (*tfv1.TFJob, error) {
-	// Check if the spec is valid.
-	un, ok := obj.(*metav1unstructured.Unstructured)
-	if !ok {
-		log.Errorf("The object in index is not an unstructured; %+v", obj)
-		return nil, errGetFromKey
-	}
-	var tfjob tfv1.TFJob
-	err := runtime.DefaultUnstructuredConverter.FromUnstructured(un.Object, &tfjob)
-	logger := tflogger.LoggerForUnstructured(un, tfv1.Kind)
-	if err != nil {
-		logger.Errorf(failedMarshalMsg, err)
-		return nil, errFailedMarshal
-	}
-	// This is a simple validation for TFJob to close
-	// https://github.com/kubeflow/tf-operator/issues/641
-	// TODO(gaocegege): Add more validation here.
-	err = validation.ValidateV1TFJobSpec(&tfjob.Spec)
-	if err != nil {
-		logger.Errorf(failedMarshalMsg, err)
-		return nil, errFailedMarshal
-	}
-	return &tfjob, nil
-}
-
-func unstructuredFromTFJob(obj interface{}, tfJob *tfv1.TFJob) error {
-	un, ok := obj.(*metav1unstructured.Unstructured)
-	logger := tflogger.LoggerForJob(tfJob)
-	if !ok {
-		logger.Warn("The object in index isn't type Unstructured")
-		return errGetFromKey
-	}
-
-	var err error
-	un.Object, err = runtime.DefaultUnstructuredConverter.ToUnstructured(tfJob)
-	if err != nil {
-		logger.Error("The TFJob convert failed")
-		return err
-	}
-	return nil
-
-}
diff --git a/pkg/controller.v1/tensorflow/job.go b/pkg/controller.v1/tensorflow/job.go
deleted file mode 100644
index f1bbee0e0f..0000000000
--- a/pkg/controller.v1/tensorflow/job.go
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright 2021 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License
-
-package tensorflow
-
-import (
-	"context"
-	"fmt"
-	"time"
-
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-	log "github.com/sirupsen/logrus"
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	metav1unstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
-	"k8s.io/client-go/kubernetes/scheme"
-
-	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	commonutil "github.com/kubeflow/common/pkg/util"
-	"github.com/kubeflow/common/pkg/util/k8sutil"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-)
-
-const (
-	failedMarshalTFJobReason  = "InvalidTFJobSpec"
-	FailedDeleteJobReason     = "FailedDeleteJob"
-	SuccessfulDeleteJobReason = "SuccessfulDeleteJob"
-)
-
-var (
-	tfJobsCreatedCount = promauto.NewCounterVec(
-		prometheus.CounterOpts{
-			Name: "tf_operator_jobs_created_total",
-			Help: "Counts number of TF jobs created",
-		},
-		[]string{"job_namespace"},
-	)
-)
-
-// DeleteJob implements ControllerInterface interface.
-func (tc *TFController) DeleteJob(job interface{}) error {
-	tfJob, ok := job.(*tfv1.TFJob)
-	if !ok {
-		return fmt.Errorf("%v is not a type of TFJob", tfJob)
-	}
-
-	log := commonutil.LoggerForJob(tfJob)
-	if err := tc.tfJobClientSet.KubeflowV1().TFJobs(tfJob.Namespace).Delete(context.TODO(), tfJob.Name, metav1.DeleteOptions{}); err != nil {
-		tc.JobController.Recorder.Eventf(tfJob, v1.EventTypeWarning, FailedDeleteJobReason, "Error deleting: %v", err)
-		log.Errorf("failed to delete job %s/%s, %v", tfJob.Namespace, tfJob.Name, err)
-		return err
-	}
-
-	tc.JobController.Recorder.Eventf(tfJob, v1.EventTypeNormal, SuccessfulDeleteJobReason, "Deleted job: %v", tfJob.Name)
-	log.Infof("job %s/%s has been deleted", tfJob.Namespace, tfJob.Name)
-	return nil
-}
-
-// addTFJob sets the defaults and enqueue the current tfjob.
-func (tc *TFController) addTFJob(obj interface{}) {
-	// Convert from unstructured object.
-	tfJob, err := tfJobFromUnstructured(obj)
-	if err != nil {
-		un, ok := obj.(*metav1unstructured.Unstructured)
-		logger := &log.Entry{}
-		if ok {
-			logger = commonutil.LoggerForUnstructured(un, tfv1.Kind)
-		}
-		logger.Errorf("Failed to convert the TFJob: %v", err)
-		// Log the failure to conditions.
-		if err == errFailedMarshal {
-			errMsg := fmt.Sprintf("Failed to marshal the object to TFJob; the spec is invalid: %v", err)
-			logger.Warn(errMsg)
-			// TODO(jlewi): v1 doesn't appear to define an error type.
-			tc.Recorder.Event(un, v1.EventTypeWarning, failedMarshalTFJobReason, errMsg)
-
-			status := commonv1.JobStatus{
-				Conditions: []commonv1.JobCondition{
-					{
-						Type:               commonv1.JobFailed,
-						Status:             v1.ConditionTrue,
-						LastUpdateTime:     metav1.Now(),
-						LastTransitionTime: metav1.Now(),
-						Reason:             failedMarshalTFJobReason,
-						Message:            errMsg,
-					},
-				},
-			}
-
-			statusMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&status)
-
-			if err != nil {
-				logger.Errorf("Could not covert the TFJobStatus to unstructured; %v", err)
-				return
-			}
-
-			client, err := k8sutil.NewCRDRestClient(&tfv1.GroupVersion)
-
-			if err == nil {
-				if err1 := metav1unstructured.SetNestedField(un.Object, statusMap, "status"); err1 != nil {
-					logger.Errorf("Could not set nested field: %v", err1)
-				}
-				logger.Infof("Updating the job to: %+v", un.Object)
-				err = client.UpdateStatus(un, tfv1.Plural)
-				if err != nil {
-					logger.Errorf("Could not update the TFJob: %v", err)
-				}
-			} else {
-				logger.Errorf("Could not create a REST client to update the TFJob")
-			}
-		}
-		return
-	}
-
-	// Set default for the new tfjob.
-	// TODO(Jeffwan): Consider to change to scheme https://github.com/kubeflow/tf-operator/issues/1317#issuecomment-890397705
-	tfv1.SetDefaults_TFJob(tfJob)
-	scheme.Scheme.Default(tfJob)
-
-	msg := fmt.Sprintf("TFJob %s is created.", tfJob.Name)
-	logger := commonutil.LoggerForJob(tfJob)
-	logger.Info(msg)
-
-	// Add a created condition.
-	err = commonutil.UpdateJobConditions(&tfJob.Status, commonv1.JobCreated, tfJobCreatedReason, msg)
-	if err != nil {
-		logger.Errorf("Append tfJob condition error: %v", err)
-		return
-	}
-
-	// Convert from tfjob object
-	err = unstructuredFromTFJob(obj, tfJob)
-	if err != nil {
-		logger.Errorf("Failed to convert the obj: %v", err)
-		return
-	}
-	tc.enqueueTFJob(obj)
-	tfJobsCreatedCount.WithLabelValues(tfJob.Namespace).Inc()
-}
-
-// updateTFJob enqueues the current tfjob.
-func (tc *TFController) updateTFJob(old, cur interface{}) {
-	oldTFJob, err := tfJobFromUnstructured(old)
-	if err != nil {
-		return
-	}
-	curTFJob, err := tfJobFromUnstructured(cur)
-	if err != nil {
-		return
-	}
-
-	// never return error
-	key, err := KeyFunc(curTFJob)
-	if err != nil {
-		return
-	}
-
-	log.Infof("Updating tfjob: %s", oldTFJob.Name)
-	tc.enqueueTFJob(cur)
-
-	// check if need to add a new rsync for ActiveDeadlineSeconds
-	if curTFJob.Status.StartTime != nil {
-		curTFJobADS := curTFJob.Spec.RunPolicy.ActiveDeadlineSeconds
-		if curTFJobADS == nil {
-			return
-		}
-		oldTFJobADS := oldTFJob.Spec.RunPolicy.ActiveDeadlineSeconds
-		if oldTFJobADS == nil || *oldTFJobADS != *curTFJobADS {
-			now := metav1.Now()
-			start := curTFJob.Status.StartTime.Time
-			passed := now.Time.Sub(start)
-			total := time.Duration(*curTFJobADS) * time.Second
-			// AddAfter will handle total < passed
-			tc.WorkQueue.AddAfter(key, total-passed)
-			log.Infof("job ActiveDeadlineSeconds updated, will rsync after %d seconds", total-passed)
-		}
-	}
-}
diff --git a/pkg/controller.v1/tensorflow/job_test.go b/pkg/controller.v1/tensorflow/job_test.go
deleted file mode 100644
index f5b0df4e6d..0000000000
--- a/pkg/controller.v1/tensorflow/job_test.go
+++ /dev/null
@@ -1,810 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tensorflow
-
-import (
-	"testing"
-	"time"
-
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	kubeclientset "k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/rest"
-	"k8s.io/client-go/tools/record"
-	batchv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
-	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
-
-	common "github.com/kubeflow/common/pkg/apis/common/v1"
-	"github.com/kubeflow/common/pkg/controller.v1/control"
-	commonutil "github.com/kubeflow/common/pkg/util"
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
-	"github.com/kubeflow/tf-operator/pkg/common/util/v1/testutil"
-)
-
-func TestAddTFJob(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0,
-		options.ServerOption{})
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-	tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-
-	stopCh := make(chan struct{})
-	run := func(<-chan struct{}) {
-		if err := ctr.Run(testutil.ThreadCount, stopCh); err != nil {
-			t.Errorf("Failed to run the controller: %v", err)
-		}
-	}
-	go run(stopCh)
-
-	var key string
-	syncChan := make(chan string)
-	ctr.syncHandler = func(tfJobKey string) (bool, error) {
-		key = tfJobKey
-		<-syncChan
-		return true, nil
-	}
-
-	tfJob := testutil.NewTFJob(1, 0)
-	unstructured, err := testutil.ConvertTFJobToUnstructured(tfJob)
-	if err != nil {
-		t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-	}
-	if err := tfJobIndexer.Add(unstructured); err != nil {
-		t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-	}
-	ctr.addTFJob(unstructured)
-
-	syncChan <- "sync"
-	if key != testutil.GetKey(tfJob, t) {
-		t.Errorf("Failed to enqueue the TFJob %s: expected %s, got %s", tfJob.Name, testutil.GetKey(tfJob, t), key)
-	}
-	close(stopCh)
-}
-
-func TestCopyLabelsAndAnnotation(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	fakePodControl := &control.FakePodControl{}
-	ctr.PodControl = fakePodControl
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-	tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-
-	stopCh := make(chan struct{})
-	run := func(<-chan struct{}) {
-		if err := ctr.Run(testutil.ThreadCount, stopCh); err != nil {
-			t.Errorf("Failed to run the controller: %v", err)
-		}
-	}
-	go run(stopCh)
-
-	tfJob := testutil.NewTFJob(1, 0)
-	annotations := map[string]string{
-		"annotation1": "1",
-	}
-	labels := map[string]string{
-		"label1": "1",
-	}
-	tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].Template.Labels = labels
-	tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].Template.Annotations = annotations
-	unstructured, err := testutil.ConvertTFJobToUnstructured(tfJob)
-	if err != nil {
-		t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-	}
-
-	if err := tfJobIndexer.Add(unstructured); err != nil {
-		t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-	}
-
-	_ = ctr.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy)
-
-	if len(fakePodControl.Templates) != 1 {
-		t.Errorf("Expected to create 1 pod while got %d", len(fakePodControl.Templates))
-	}
-	actual := fakePodControl.Templates[0]
-	v, exist := actual.Labels["label1"]
-	if !exist {
-		t.Errorf("Labels does not exist")
-	}
-	if v != "1" {
-		t.Errorf("Labels value do not equal")
-	}
-
-	v, exist = actual.Annotations["annotation1"]
-	if !exist {
-		t.Errorf("Annotations does not exist")
-	}
-	if v != "1" {
-		t.Errorf("Annotations value does not equal")
-	}
-
-	close(stopCh)
-}
-
-func TestDeletePodsAndServices(t *testing.T) {
-	type testCase struct {
-		description string
-		tfJob       *tfv1.TFJob
-
-		pendingWorkerPods   int32
-		activeWorkerPods    int32
-		succeededWorkerPods int32
-		failedWorkerPods    int32
-
-		pendingPSPods   int32
-		activePSPods    int32
-		succeededPSPods int32
-		failedPSPods    int32
-
-		activeWorkerServices int32
-		activePSServices     int32
-
-		expectedPodDeletions int
-	}
-
-	testCases := []testCase{
-		testCase{
-			description: "4 workers and 2 ps is running, policy is all",
-			tfJob:       testutil.NewTFJobWithCleanPolicy(0, 4, 2, common.CleanPodPolicyAll),
-
-			pendingWorkerPods:   0,
-			activeWorkerPods:    4,
-			succeededWorkerPods: 0,
-			failedWorkerPods:    0,
-
-			pendingPSPods:   0,
-			activePSPods:    2,
-			succeededPSPods: 0,
-			failedPSPods:    0,
-
-			activeWorkerServices: 4,
-			activePSServices:     2,
-
-			expectedPodDeletions: 6,
-		},
-		testCase{
-			description: "4 workers and 2 ps is running, policy is running",
-			tfJob:       testutil.NewTFJobWithCleanPolicy(0, 4, 2, common.CleanPodPolicyRunning),
-
-			pendingWorkerPods:   0,
-			activeWorkerPods:    4,
-			succeededWorkerPods: 0,
-			failedWorkerPods:    0,
-
-			pendingPSPods:   0,
-			activePSPods:    2,
-			succeededPSPods: 0,
-			failedPSPods:    0,
-
-			activeWorkerServices: 4,
-			activePSServices:     2,
-
-			expectedPodDeletions: 6,
-		},
-		testCase{
-			description: "4 workers and 2 ps is succeeded, policy is running",
-			tfJob:       testutil.NewTFJobWithCleanPolicy(0, 4, 2, common.CleanPodPolicyRunning),
-
-			pendingWorkerPods:   0,
-			activeWorkerPods:    0,
-			succeededWorkerPods: 4,
-			failedWorkerPods:    0,
-
-			pendingPSPods:   0,
-			activePSPods:    0,
-			succeededPSPods: 2,
-			failedPSPods:    0,
-
-			activeWorkerServices: 4,
-			activePSServices:     2,
-
-			expectedPodDeletions: 0,
-		},
-		testCase{
-			description: "4 workers and 2 ps is succeeded, policy is None",
-			tfJob:       testutil.NewTFJobWithCleanPolicy(0, 4, 2, common.CleanPodPolicyNone),
-
-			pendingWorkerPods:   0,
-			activeWorkerPods:    0,
-			succeededWorkerPods: 4,
-			failedWorkerPods:    0,
-
-			pendingPSPods:   0,
-			activePSPods:    0,
-			succeededPSPods: 2,
-			failedPSPods:    0,
-
-			activeWorkerServices: 4,
-			activePSServices:     2,
-
-			expectedPodDeletions: 0,
-		},
-	}
-	for _, tc := range testCases {
-		// Prepare the clientset and controller for the test.
-		kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &v1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		// Prepare the volcano clientset and controller for the test.
-		volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &batchv1beta1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		config := &rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &tfv1.GroupVersion,
-			},
-		}
-		tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-		ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-			volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-		fakePodControl := &control.FakePodControl{}
-		ctr.PodControl = fakePodControl
-		fakeServiceControl := &control.FakeServiceControl{}
-		ctr.ServiceControl = fakeServiceControl
-		ctr.Recorder = &record.FakeRecorder{}
-		ctr.tfJobInformerSynced = testutil.AlwaysReady
-		ctr.PodInformerSynced = testutil.AlwaysReady
-		ctr.ServiceInformerSynced = testutil.AlwaysReady
-		tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-
-		// Set succeeded to run the logic about deleting.
-		err := commonutil.UpdateJobConditions(&tc.tfJob.Status, common.JobSucceeded, tfJobSucceededReason, "")
-		if err != nil {
-			t.Errorf("Append tfjob condition error: %v", err)
-		}
-
-		unstructured, err := testutil.ConvertTFJobToUnstructured(tc.tfJob)
-		if err != nil {
-			t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-		}
-
-		if err := tfJobIndexer.Add(unstructured); err != nil {
-			t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-		}
-
-		podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelWorker, tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, nil, t)
-		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelPS, tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, nil, t)
-
-		serviceIndexer := kubeInformerFactory.Core().V1().Services().Informer().GetIndexer()
-		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelWorker, tc.activeWorkerServices, t)
-		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelPS, tc.activePSServices, t)
-
-		_ = ctr.ReconcileJobs(tc.tfJob, tc.tfJob.Spec.TFReplicaSpecs, tc.tfJob.Status, &tc.tfJob.Spec.RunPolicy)
-		// forget, err := ctr.syncTFJob(testutil.GetKey(tc.tfJob, t))
-		// if err != nil {
-		// 	t.Errorf("%s: unexpected error when syncing jobs %v", tc.description, err)
-		// }
-		// if !forget {
-		// 	t.Errorf("%s: unexpected forget value. Expected true, saw %v\n", tc.description, forget)
-		// }
-
-		if len(fakePodControl.DeletePodName) != tc.expectedPodDeletions {
-			t.Errorf("%s: unexpected number of pod deletes.  Expected %d, saw %d\n", tc.description, tc.expectedPodDeletions, len(fakePodControl.DeletePodName))
-		}
-		if len(fakeServiceControl.DeleteServiceName) != tc.expectedPodDeletions {
-			t.Errorf("%s: unexpected number of service deletes.  Expected %d, saw %d\n", tc.description, tc.expectedPodDeletions, len(fakeServiceControl.DeleteServiceName))
-		}
-	}
-}
-
-// TODO(ChanYiLin): I have to remove this test since I can't overwrite the deleteTFJobHandler() function
-// It is now in common library as part of controller interface - DeleteJob()
-// func TestCleanupTFJob(t *testing.T) {
-// 	type testCase struct {
-// 		description string
-// 		tfJob       *tfv1.TFJob
-
-// 		pendingWorkerPods   int32
-// 		activeWorkerPods    int32
-// 		succeededWorkerPods int32
-// 		failedWorkerPods    int32
-
-// 		pendingPSPods   int32
-// 		activePSPods    int32
-// 		succeededPSPods int32
-// 		failedPSPods    int32
-
-// 		activeWorkerServices int32
-// 		activePSServices     int32
-
-// 		expectedDeleteFinished bool
-// 	}
-
-// 	ttlaf0 := int32(0)
-// 	ttl0 := &ttlaf0
-// 	ttlaf2s := int32(2)
-// 	ttl2s := &ttlaf2s
-// 	testCases := []testCase{
-// 		testCase{
-// 			description: "4 workers and 2 ps is running, TTLSecondsAfterFinished unset",
-// 			tfJob:       testutil.NewTFJobWithCleanupJobDelay(0, 4, 2, nil),
-
-// 			pendingWorkerPods:   0,
-// 			activeWorkerPods:    4,
-// 			succeededWorkerPods: 0,
-// 			failedWorkerPods:    0,
-
-// 			pendingPSPods:   0,
-// 			activePSPods:    2,
-// 			succeededPSPods: 0,
-// 			failedPSPods:    0,
-
-// 			activeWorkerServices: 4,
-// 			activePSServices:     2,
-
-// 			expectedDeleteFinished: false,
-// 		},
-// 		testCase{
-// 			description: "4 workers and 2 ps is running, TTLSecondsAfterFinished is 0",
-// 			tfJob:       testutil.NewTFJobWithCleanupJobDelay(0, 4, 2, ttl0),
-
-// 			pendingWorkerPods:   0,
-// 			activeWorkerPods:    4,
-// 			succeededWorkerPods: 0,
-// 			failedWorkerPods:    0,
-
-// 			pendingPSPods:   0,
-// 			activePSPods:    2,
-// 			succeededPSPods: 0,
-// 			failedPSPods:    0,
-
-// 			activeWorkerServices: 4,
-// 			activePSServices:     2,
-
-// 			expectedDeleteFinished: true,
-// 		},
-// 		testCase{
-// 			description: "4 workers and 2 ps is succeeded, TTLSecondsAfterFinished is 2",
-// 			tfJob:       testutil.NewTFJobWithCleanupJobDelay(0, 4, 2, ttl2s),
-
-// 			pendingWorkerPods:   0,
-// 			activeWorkerPods:    0,
-// 			succeededWorkerPods: 4,
-// 			failedWorkerPods:    0,
-
-// 			pendingPSPods:   0,
-// 			activePSPods:    0,
-// 			succeededPSPods: 2,
-// 			failedPSPods:    0,
-
-// 			activeWorkerServices: 4,
-// 			activePSServices:     2,
-
-// 			expectedDeleteFinished: true,
-// 		},
-// 	}
-// 	for _, tc := range testCases {
-// 		// Prepare the clientset and controller for the test.
-// 		kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-// 			Host: "",
-// 			ContentConfig: rest.ContentConfig{
-// 				GroupVersion: &v1.SchemeGroupVersion,
-// 			},
-// 		},
-// 		)
-
-// 		// Prepare the volcano clientset and controller for the test.
-// 		volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-// 			Host: "",
-// 			ContentConfig: rest.ContentConfig{
-// 				GroupVersion: &batchv1beta1.SchemeGroupVersion,
-// 			},
-// 		},
-// 		)
-
-// 		config := &rest.Config{
-// 			Host: "",
-// 			ContentConfig: rest.ContentConfig{
-// 				GroupVersion: &tfv1.SchemeGroupVersion,
-// 			},
-// 		}
-// 		tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-// 		ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet, volcanoClientSet, tfJobClientSet, controller.NoResyncPeriodFunc, options.ServerOption{})
-// 		fakePodControl := &controller.FakePodControl{}
-// 		ctr.PodControl = fakePodControl
-// 		fakeServiceControl := &control.FakeServiceControl{}
-// 		ctr.ServiceControl = fakeServiceControl
-// 		ctr.Recorder = &record.FakeRecorder{}
-// 		ctr.tfJobInformerSynced = testutil.AlwaysReady
-// 		ctr.PodInformerSynced = testutil.AlwaysReady
-// 		ctr.ServiceInformerSynced = testutil.AlwaysReady
-// 		tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-// 		ctr.updateStatusHandler = func(job interface{}, jobStatus *commonv1.JobStatus) error {
-// 			return nil
-// 		}
-// 		deleteFinished := false
-// 		ctr.deleteTFJobHandler = func(tfJob *tfv1.TFJob) error {
-// 			deleteFinished = true
-// 			return nil
-// 		}
-
-// 		// Set succeeded to run the logic about deleting.
-// 		testutil.SetTFJobCompletionTime(tc.tfJob)
-// 		err := commonutil.UpdateJobConditions(&tc.tfJob.Status, common.JobSucceeded, tfJobSucceededReason, "")
-// 		if err != nil {
-// 			t.Errorf("Append tfjob condition error: %v", err)
-// 		}
-
-// 		unstructured, err := testutil.ConvertTFJobToUnstructured(tc.tfJob)
-// 		if err != nil {
-// 			t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-// 		}
-
-// 		if err := tfJobIndexer.Add(unstructured); err != nil {
-// 			t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-// 		}
-
-// 		podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-// 		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelWorker, tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, nil, t)
-// 		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelPS, tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, nil, t)
-
-// 		serviceIndexer := kubeInformerFactory.Core().V1().Services().Informer().GetIndexer()
-// 		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelWorker, tc.activeWorkerServices, t)
-// 		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelPS, tc.activePSServices, t)
-
-// 		ttl := tc.tfJob.Spec.RunPolicy.TTLSecondsAfterFinished
-// 		if ttl != nil {
-// 			dur := time.Second * time.Duration(*ttl)
-// 			time.Sleep(dur)
-// 		}
-
-// 		//forget, err := ctr.syncTFJob(testutil.GetKey(tc.tfJob, t))
-// 		_ = ctr.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy)
-// 		ctr.DeleteJob = func(job interface{}) error {
-// 			deleteFinished = true
-// 			return nil
-// 		}
-// 		// if err != nil {
-// 		// 	t.Errorf("%s: unexpected error when syncing jobs %v", tc.description, err)
-// 		// }
-// 		// if !forget {
-// 		// 	t.Errorf("%s: unexpected forget value. Expected true, saw %v\n", tc.description, forget)
-// 		// }
-
-// 		if deleteFinished != tc.expectedDeleteFinished {
-// 			t.Errorf("%s: unexpected status. Expected %v, saw %v", tc.description, tc.expectedDeleteFinished, deleteFinished)
-// 		}
-// 	}
-// }
-
-func TestActiveDeadlineSeconds(t *testing.T) {
-	type testCase struct {
-		description string
-		tfJob       *tfv1.TFJob
-
-		pendingWorkerPods   int32
-		activeWorkerPods    int32
-		succeededWorkerPods int32
-		failedWorkerPods    int32
-
-		pendingPSPods   int32
-		activePSPods    int32
-		succeededPSPods int32
-		failedPSPods    int32
-
-		activeWorkerServices int32
-		activePSServices     int32
-
-		expectedPodDeletions int
-	}
-
-	ads2 := int64(2)
-	adsTest2 := &ads2
-	testCases := []testCase{
-		testCase{
-			description: "4 workers and 2 ps is running, ActiveDeadlineSeconds unset",
-			tfJob:       testutil.NewTFJobWithActiveDeadlineSeconds(0, 4, 2, nil),
-
-			pendingWorkerPods:   0,
-			activeWorkerPods:    4,
-			succeededWorkerPods: 0,
-			failedWorkerPods:    0,
-
-			pendingPSPods:   0,
-			activePSPods:    2,
-			succeededPSPods: 0,
-			failedPSPods:    0,
-
-			activeWorkerServices: 4,
-			activePSServices:     2,
-
-			expectedPodDeletions: 0,
-		},
-		testCase{
-			description: "4 workers and 2 ps is running, ActiveDeadlineSeconds is 2",
-			tfJob:       testutil.NewTFJobWithActiveDeadlineSeconds(0, 4, 2, adsTest2),
-
-			pendingWorkerPods:   0,
-			activeWorkerPods:    4,
-			succeededWorkerPods: 0,
-			failedWorkerPods:    0,
-
-			pendingPSPods:   0,
-			activePSPods:    2,
-			succeededPSPods: 0,
-			failedPSPods:    0,
-
-			activeWorkerServices: 4,
-			activePSServices:     2,
-
-			expectedPodDeletions: 6,
-		},
-	}
-	for _, tc := range testCases {
-		// Prepare the clientset and controller for the test.
-		kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &v1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		// Prepare the volcano clientset and controller for the test.
-		volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &batchv1beta1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		config := &rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &tfv1.GroupVersion,
-			},
-		}
-		tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-		ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-			volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-		fakePodControl := &control.FakePodControl{}
-		ctr.PodControl = fakePodControl
-		fakeServiceControl := &control.FakeServiceControl{}
-		ctr.ServiceControl = fakeServiceControl
-		ctr.Recorder = &record.FakeRecorder{}
-		ctr.tfJobInformerSynced = testutil.AlwaysReady
-		ctr.PodInformerSynced = testutil.AlwaysReady
-		ctr.ServiceInformerSynced = testutil.AlwaysReady
-		tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-
-		unstructured, err := testutil.ConvertTFJobToUnstructured(tc.tfJob)
-		if err != nil {
-			t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-		}
-
-		if err := tfJobIndexer.Add(unstructured); err != nil {
-			t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-		}
-
-		podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelWorker, tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, nil, t)
-		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelPS, tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, nil, t)
-
-		serviceIndexer := kubeInformerFactory.Core().V1().Services().Informer().GetIndexer()
-		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelWorker, tc.activeWorkerServices, t)
-		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelPS, tc.activePSServices, t)
-
-		foo, _ := ctr.getTFJobFromName("default", "test-tfjob")
-		now := metav1.Now()
-		foo.Status.StartTime = &now
-
-		ads := tc.tfJob.Spec.RunPolicy.ActiveDeadlineSeconds
-		if ads != nil {
-			dur := time.Second * time.Duration(*ads)
-			time.Sleep(dur)
-		}
-
-		_ = ctr.ReconcileJobs(foo, foo.Spec.TFReplicaSpecs, foo.Status, &foo.Spec.RunPolicy)
-		// if err != nil {
-		// 	t.Errorf("%s: unexpected error when syncing jobs %v", tc.description, err)
-		// }
-
-		if len(fakePodControl.DeletePodName) != tc.expectedPodDeletions {
-			t.Errorf("%s: unexpected number of pod deletes.  Expected %d, saw %d\n", tc.description, tc.expectedPodDeletions, len(fakePodControl.DeletePodName))
-		}
-		if len(fakeServiceControl.DeleteServiceName) != tc.expectedPodDeletions {
-			t.Errorf("%s: unexpected number of service deletes.  Expected %d, saw %d\n", tc.description, tc.expectedPodDeletions, len(fakeServiceControl.DeleteServiceName))
-		}
-	}
-}
-
-func TestBackoffForOnFailure(t *testing.T) {
-	type testCase struct {
-		description string
-		tfJob       *tfv1.TFJob
-
-		pendingWorkerPods   int32
-		activeWorkerPods    int32
-		succeededWorkerPods int32
-		failedWorkerPods    int32
-
-		restartCounts []int32
-
-		pendingPSPods   int32
-		activePSPods    int32
-		succeededPSPods int32
-		failedPSPods    int32
-
-		activeWorkerServices int32
-		activePSServices     int32
-
-		expectedPodDeletions int
-	}
-
-	backoffLimit4 := int32(4)
-	backoffLimitTest4 := &backoffLimit4
-	testCases := []testCase{
-		testCase{
-			description: "4 workers each having 1 restartCount and 2 ps is running, backoffLimit 4 ",
-			tfJob:       testutil.NewTFJobWithBackoffLimit(0, 4, 2, backoffLimitTest4),
-
-			pendingWorkerPods:   0,
-			activeWorkerPods:    4,
-			succeededWorkerPods: 0,
-			failedWorkerPods:    0,
-
-			restartCounts: []int32{1, 1, 1, 1},
-
-			pendingPSPods:   0,
-			activePSPods:    2,
-			succeededPSPods: 0,
-			failedPSPods:    0,
-
-			activeWorkerServices: 4,
-			activePSServices:     2,
-
-			expectedPodDeletions: 6,
-		},
-	}
-	for _, tc := range testCases {
-		// Prepare the clientset and controller for the test.
-		kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &v1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		// Prepare the volcano clientset and controller for the test.
-		volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &batchv1beta1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		config := &rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &tfv1.GroupVersion,
-			},
-		}
-		tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-		ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-			volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-		fakePodControl := &control.FakePodControl{}
-		ctr.PodControl = fakePodControl
-		fakeServiceControl := &control.FakeServiceControl{}
-		ctr.ServiceControl = fakeServiceControl
-		ctr.Recorder = &record.FakeRecorder{}
-		ctr.tfJobInformerSynced = testutil.AlwaysReady
-		ctr.PodInformerSynced = testutil.AlwaysReady
-		ctr.ServiceInformerSynced = testutil.AlwaysReady
-		tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-
-		unstructured, err := testutil.ConvertTFJobToUnstructured(tc.tfJob)
-		if err != nil {
-			t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-		}
-
-		if err := tfJobIndexer.Add(unstructured); err != nil {
-			t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-		}
-
-		podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelWorker, tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, tc.restartCounts, t)
-		testutil.SetPodsStatuses(podIndexer, tc.tfJob, testutil.LabelPS, tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, tc.restartCounts, t)
-
-		serviceIndexer := kubeInformerFactory.Core().V1().Services().Informer().GetIndexer()
-		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelWorker, tc.activeWorkerServices, t)
-		testutil.SetServices(serviceIndexer, tc.tfJob, testutil.LabelPS, tc.activePSServices, t)
-
-		_ = ctr.ReconcileJobs(tc.tfJob, tc.tfJob.Spec.TFReplicaSpecs, tc.tfJob.Status, &tc.tfJob.Spec.RunPolicy)
-		// forget, err := ctr.syncTFJob(testutil.GetKey(tc.tfJob, t))
-		// if err != nil {
-		// 	t.Errorf("%s: unexpected error when syncing jobs %v", tc.description, err)
-		// }
-		// if !forget {
-		// 	t.Errorf("%s: unexpected forget value. Expected true, saw %v\n", tc.description, forget)
-		// }
-
-		if len(fakePodControl.DeletePodName) != tc.expectedPodDeletions {
-			t.Errorf("%s: unexpected number of pod deletes.  Expected %d, saw %d\n", tc.description, tc.expectedPodDeletions, len(fakePodControl.DeletePodName))
-		}
-		if len(fakeServiceControl.DeleteServiceName) != tc.expectedPodDeletions {
-			t.Errorf("%s: unexpected number of service deletes.  Expected %d, saw %d\n", tc.description, tc.expectedPodDeletions, len(fakeServiceControl.DeleteServiceName))
-		}
-	}
-}
diff --git a/pkg/controller.v1/tensorflow/pod.go b/pkg/controller.v1/tensorflow/pod.go
deleted file mode 100644
index 37b9bdb729..0000000000
--- a/pkg/controller.v1/tensorflow/pod.go
+++ /dev/null
@@ -1,381 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package controller provides a Kubernetes controller for a TFJob resource.
-package tensorflow
-
-import (
-	"fmt"
-	"strconv"
-	"strings"
-
-	"github.com/kubeflow/tf-operator/pkg/common/util"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-
-	corev1 "k8s.io/api/core/v1"
-	v1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/errors"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-
-	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	"github.com/kubeflow/common/pkg/controller.v1/common"
-	"github.com/kubeflow/common/pkg/controller.v1/expectation"
-	commonutil "github.com/kubeflow/common/pkg/util"
-	train_util "github.com/kubeflow/common/pkg/util/train"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-)
-
-const (
-	// gang scheduler name.
-	gangSchedulerName = "volcano"
-	// tfConfig is the environment variable name of TensorFlow cluster spec.
-	tfConfig = "TF_CONFIG"
-	// exitedWithCodeReason is the normal reason when the pod is exited because of the exit code.
-	exitedWithCodeReason = "ExitedWithCode"
-	// podTemplateRestartPolicyReason is the warning reason when the restart
-	// policy is set in pod template.
-	podTemplateRestartPolicyReason = "SettedPodTemplateRestartPolicy"
-	// podTemplateSchedulerNameReason is the warning reason when other scheduler name is set
-	// in pod templates with gang-scheduling enabled
-	podTemplateSchedulerNameReason = "SettedPodTemplateSchedulerName"
-	// gangSchedulingPodGroupAnnotation is the annotation key used by batch schedulers
-	gangSchedulingPodGroupAnnotation = "scheduling.k8s.io/group-name"
-)
-
-var (
-	tfJobsRestartCount = promauto.NewCounterVec(
-		prometheus.CounterOpts{
-			Name: "tf_operator_jobs_restarted_total",
-			Help: "Counts number of TF jobs restarted",
-		},
-		[]string{"job_namespace"},
-	)
-)
-
-// reconcilePods checks and updates pods for each given TFReplicaSpec.
-// It will requeue the tfjob in case of an error while creating/deleting pods.
-func (tc *TFController) ReconcilePods(
-	job interface{},
-	jobStatus *commonv1.JobStatus,
-	pods []*v1.Pod,
-	rtype commonv1.ReplicaType,
-	spec *commonv1.ReplicaSpec,
-	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec,
-) error {
-
-	tfJob, ok := job.(*tfv1.TFJob)
-	if !ok {
-		return fmt.Errorf("%v is not a type of TFJob", tfJob)
-	}
-
-	// Convert ReplicaType to lower string.
-	rt := strings.ToLower(string(rtype))
-	logger := commonutil.LoggerForJob(tfJob)
-	// Get all pods for the type rt.
-	pods, err := tc.FilterPodsForReplicaType(pods, rt)
-	if err != nil {
-		return err
-	}
-	numReplicas := int(*spec.Replicas)
-	masterRole := false
-	//restart := false
-	//worker0Completed := false
-
-	initializeReplicaStatuses(jobStatus, rtype)
-
-	// GetPodSlices will return enough information here to make decision to add/remove/update resources.
-	//
-	// For example, let's assume we have pods with replica-index 0, 1, 2
-	// If replica is 4, return a slice with size 4. [[0],[1],[2],[]], a pod with replica-index 3 will be created.
-	//
-	// If replica is 1, return a slice with size 3. [[0],[1],[2]], pod with replica-index 1 and 2 are out of range and will be deleted.
-	podSlices := tc.GetPodSlices(pods, numReplicas, logger)
-	for index, podSlice := range podSlices {
-		if len(podSlice) > 1 {
-			logger.Warningf("We have too many pods for %s %d", rt, index)
-		} else if len(podSlice) == 0 {
-			logger.Infof("Need to create new pod: %s-%d", rt, index)
-
-			// check if this replica is the master role
-			masterRole = tc.IsMasterRole(replicas, rtype, index)
-			// TODO: [should change to CreateNewPod]
-			err = tc.createNewPod(tfJob, rt, strconv.Itoa(index), spec, masterRole, replicas)
-			if err != nil {
-				return err
-			}
-		} else {
-			// Check the status of the current pod.
-			pod := podSlice[0]
-
-			// check if the index is in the valid range, if not, we should kill the pod
-			if index < 0 || index >= numReplicas {
-				err = tc.PodControl.DeletePod(pod.Namespace, pod.Name, tfJob)
-				if err != nil {
-					return err
-				}
-			}
-			// Get the exit code of the container.
-			var exitCode int32 = 0xbeef // magic number
-			for _, status := range pod.Status.ContainerStatuses {
-				state := status.State
-				if status.Name == tc.GetDefaultContainerName() && state.Terminated != nil {
-					exitCode = state.Terminated.ExitCode
-					logger.Infof("Pod: %v.%v exited with code %v", pod.Namespace, pod.Name, exitCode)
-					tc.Recorder.Eventf(tfJob, v1.EventTypeNormal, exitedWithCodeReason, "Pod: %v.%v exited with code %v", pod.Namespace, pod.Name, exitCode)
-				}
-			}
-			// Check if the pod is retryable.
-			if spec.RestartPolicy == commonv1.RestartPolicyExitCode {
-				if pod.Status.Phase == v1.PodFailed && train_util.IsRetryableExitCode(exitCode) {
-					logger.Infof("Need to restart the pod: %v.%v", pod.Namespace, pod.Name)
-					if err := tc.PodControl.DeletePod(pod.Namespace, pod.Name, tfJob); err != nil {
-						return err
-					}
-
-					// with common library framework, we have to handle restart status here
-					// or we won't know which replica has been restarted in updateJobStatus after reconciling all replicas
-					msg := fmt.Sprintf("TFJob %s is restarting because %s replica(s) failed.",
-						tfJob.Name, rtype)
-					tc.Recorder.Event(tfJob, corev1.EventTypeWarning, tfJobRestartingReason, msg)
-					err := commonutil.UpdateJobConditions(jobStatus, commonv1.JobRestarting, tfJobRestartingReason, msg)
-					if err != nil {
-						commonutil.LoggerForJob(tfJob).Infof("Append tfjob condition error: %v", err)
-						return err
-					}
-					tfJobsRestartCount.WithLabelValues(tfJob.Namespace).Inc()
-				}
-			}
-
-			updateJobReplicaStatuses(jobStatus, rtype, pod)
-		}
-	}
-	return nil
-}
-
-// createNewPod creates a new pod for the given index and type.
-func (tc *TFController) createNewPod(tfjob *tfv1.TFJob, rt, index string, spec *commonv1.ReplicaSpec, masterRole bool,
-	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error {
-
-	tfjobKey, err := KeyFunc(tfjob)
-	if err != nil {
-		utilruntime.HandleError(fmt.Errorf("couldn't get key for tfjob object %#v: %v", tfjob, err))
-		return err
-	}
-	expectationPodsKey := expectation.GenExpectationPodsKey(tfjobKey, rt)
-	err = tc.Expectations.ExpectCreations(expectationPodsKey, 1)
-	if err != nil {
-		return err
-	}
-	logger := commonutil.LoggerForReplica(tfjob, rt)
-	// Create OwnerReference.
-	controllerRef := tc.GenOwnerReference(tfjob)
-
-	// Set type and index for the worker.
-	labels := tc.GenLabels(tfjob.Name)
-	labels[tfReplicaTypeLabel] = rt
-	labels[tfReplicaIndexLabel] = index
-
-	if masterRole {
-		labels[commonv1.JobRoleLabel] = "master"
-	}
-
-	podTemplate := spec.Template.DeepCopy()
-
-	// Set name for the template.
-	podTemplate.Name = common.GenGeneralName(tfjob.Name, rt, index)
-
-	if podTemplate.Labels == nil {
-		podTemplate.Labels = make(map[string]string)
-	}
-
-	for key, value := range labels {
-		podTemplate.Labels[key] = value
-	}
-
-	if err := tc.SetClusterSpec(tfjob, podTemplate, rt, index); err != nil {
-		return err
-	}
-
-	// Submit a warning event if the user specifies restart policy for
-	// the pod template. We recommend to set it from the replica level.
-	if podTemplate.Spec.RestartPolicy != v1.RestartPolicy("") {
-		errMsg := "Restart policy in pod template will be overwritten by restart policy in replica spec"
-		logger.Warning(errMsg)
-		tc.Recorder.Event(tfjob, v1.EventTypeWarning, podTemplateRestartPolicyReason, errMsg)
-	}
-	setRestartPolicy(podTemplate, spec)
-
-	// if gang-scheduling is enabled:
-	// 1. if user has specified other scheduler, we report a warning without overriding any fields.
-	// 2. if no SchedulerName is set for pods, then we set the SchedulerName to "kube-batch".
-	if tc.Config.EnableGangScheduling {
-		if util.IsGangSchedulerSet(replicas, gangSchedulerName) {
-			errMsg := "Another scheduler is specified when gang-scheduling is enabled and it will not be overwritten"
-			logger.Warning(errMsg)
-			tc.Recorder.Event(tfjob, v1.EventTypeWarning, podTemplateSchedulerNameReason, errMsg)
-		} else {
-			podTemplate.Spec.SchedulerName = gangSchedulerName
-		}
-
-		if podTemplate.Annotations == nil {
-			podTemplate.Annotations = map[string]string{}
-		}
-		podTemplate.Annotations[gangSchedulingPodGroupAnnotation] = tfjob.GetName()
-		podTemplate.Annotations[volcanoTaskSpecKey] = rt
-	}
-
-	err = tc.PodControl.CreatePodsWithControllerRef(tfjob.Namespace, podTemplate, tfjob, controllerRef)
-	if err != nil && errors.IsTimeout(err) {
-		// Pod is created but its initialization has timed out.
-		// If the initialization is successful eventually, the
-		// controller will observe the creation via the informer.
-		// If the initialization fails, or if the pod keeps
-		// uninitialized for a long time, the informer will not
-		// receive any update, and the controller will create a new
-		// pod when the expectation expires.
-		return nil
-	} else if err != nil {
-		// Decrement the expected number of creates because the informer won't observe this pod
-		logger.Infof(
-			"Failed creation, decrementing expectations for tfjob %s/%s, key %s",
-			tfjob.Namespace, tfjob.Name, expectationPodsKey)
-		tc.Expectations.CreationObserved(expectationPodsKey)
-		return err
-	}
-	return nil
-}
-
-// SetClusterSpec generates and sets TF_CONFIG for the given podTemplateSpec.
-func (tc *TFController) SetClusterSpec(job interface{}, podTemplate *v1.PodTemplateSpec, rtype, index string) error {
-	tfjob, ok := job.(*tfv1.TFJob)
-	if !ok {
-		return fmt.Errorf("%v is not a type of MXJob", tfjob)
-	}
-
-	// Do not set TF_CONFIG for local training jobs.
-	if !isDistributed(tfjob) {
-		return nil
-	}
-	// Generate TF_CONFIG JSON string.
-	tfConfigStr, err := genTFConfigJSONStr(tfjob, rtype, index)
-	if err != nil {
-		return err
-	}
-
-	if tfConfigStr == "" {
-		return nil
-	}
-	// Add TF_CONFIG environment variable to tensorflow container in the pod.
-	for i := range podTemplate.Spec.Containers {
-		if podTemplate.Spec.Containers[i].Name == tfv1.DefaultContainerName {
-			if len(podTemplate.Spec.Containers[i].Env) == 0 {
-				podTemplate.Spec.Containers[i].Env = make([]v1.EnvVar, 0)
-			}
-			podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, v1.EnvVar{
-				Name:  tfConfig,
-				Value: tfConfigStr,
-			})
-			break
-		}
-	}
-	return nil
-}
-
-// isDistributed returns if the TFJob is a distributed training job.
-// Ref https://github.com/kubeflow/tf-operator/issues/1078.
-func isDistributed(tfjob *tfv1.TFJob) bool {
-	replicas := tfjob.Spec.TFReplicaSpecs
-	distributionCount := 0
-	allTypes := []commonv1.ReplicaType{
-		tfv1.TFReplicaTypeChief,
-		tfv1.TFReplicaTypeEval,
-		tfv1.TFReplicaTypeMaster,
-		tfv1.TFReplicaTypePS,
-		tfv1.TFReplicaTypeWorker,
-	}
-	// Check if there is only one replica.
-	for _, typ := range allTypes {
-		if replicas[typ] != nil {
-			if replicas[typ].Replicas == nil {
-				distributionCount++
-			} else {
-				distributionCount += int(*replicas[typ].Replicas)
-			}
-		}
-	}
-	return distributionCount != 1
-}
-
-func setRestartPolicy(podTemplateSpec *v1.PodTemplateSpec, spec *commonv1.ReplicaSpec) {
-	// This is necessary since restartPolicyExitCode is not supported in v1.PodTemplateSpec
-	if spec.RestartPolicy == commonv1.RestartPolicyExitCode {
-		podTemplateSpec.Spec.RestartPolicy = v1.RestartPolicyNever
-	} else {
-		podTemplateSpec.Spec.RestartPolicy = v1.RestartPolicy(spec.RestartPolicy)
-	}
-}
-
-func (tc *TFController) getPodSlices(tfjob *tfv1.TFJob, replicasNum *int32) ([][]*v1.Pod, error) {
-	logger := commonutil.LoggerForReplica(tfjob, strings.ToLower(string(tfv1.TFReplicaTypeWorker)))
-
-	pods, err := tc.Controller.GetPodsForJob(tfjob)
-	if err != nil {
-		commonutil.LoggerForJob(tfjob).Warnf("getPodsForTFJob error %v", err)
-		return nil, err
-	}
-
-	// Get all pods for the type rt.
-	pods, err = tc.FilterPodsForReplicaType(pods, strings.ToLower(string(tfv1.TFReplicaTypeWorker)))
-	if err != nil {
-		return nil, err
-	}
-
-	podSlices := tc.GetPodSlices(pods, int(*replicasNum), logger)
-	return podSlices, nil
-}
-
-func getContainerExitCode(pod *v1.Pod) int32 {
-	var exitCode int32 = 0xbeef // magic number
-	for _, status := range pod.Status.ContainerStatuses {
-		state := status.State
-		if status.Name == tfv1.DefaultContainerName && state.Terminated != nil {
-			exitCode = state.Terminated.ExitCode
-		}
-	}
-	return exitCode
-}
-
-// IsWorker0Completed return true if pod of worker0 succeeded and exited with 0
-func (tc *TFController) IsWorker0Completed(tfjob *tfv1.TFJob, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) (bool, error) {
-	worker0Completed := false
-	_, ok := replicas[tfv1.TFReplicaTypeWorker]
-	if !ok {
-		return true, nil
-	}
-	podSlices, err := tc.getPodSlices(tfjob, replicas[tfv1.TFReplicaTypeWorker].Replicas)
-	if err != nil {
-		return false, err
-	}
-	for index, podSlice := range podSlices {
-		if len(podSlice) == 1 {
-			pod := podSlice[0]
-			exitCode := getContainerExitCode(pod)
-			if index == 0 && exitCode == 0 && pod.Status.Phase == v1.PodSucceeded {
-				worker0Completed = true
-			}
-		}
-	}
-	return worker0Completed, nil
-}
diff --git a/pkg/controller.v1/tensorflow/pod_test.go b/pkg/controller.v1/tensorflow/pod_test.go
deleted file mode 100644
index 6c3c67d8d7..0000000000
--- a/pkg/controller.v1/tensorflow/pod_test.go
+++ /dev/null
@@ -1,823 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package controller provides a Kubernetes controller for a TFJob resource.
-package tensorflow
-
-import (
-	"fmt"
-	"os"
-	"reflect"
-	"testing"
-
-	v1 "k8s.io/api/core/v1"
-	kubeclientset "k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/rest"
-	"k8s.io/client-go/tools/record"
-	batchv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
-	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
-
-	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	"github.com/kubeflow/common/pkg/controller.v1/common"
-	"github.com/kubeflow/common/pkg/controller.v1/control"
-	"github.com/kubeflow/common/pkg/controller.v1/expectation"
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
-	"github.com/kubeflow/tf-operator/pkg/common/util/v1/testutil"
-)
-
-func TestAddPod(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-	tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-
-	stopCh := make(chan struct{})
-	run := func(<-chan struct{}) {
-		if err := ctr.Run(testutil.ThreadCount, stopCh); err != nil {
-			t.Errorf("Failed to run the controller: %v", err)
-		}
-	}
-	go run(stopCh)
-
-	var key string
-	syncChan := make(chan string)
-	ctr.syncHandler = func(tfJobKey string) (bool, error) {
-		key = tfJobKey
-		<-syncChan
-		return true, nil
-	}
-
-	tfJob := testutil.NewTFJob(1, 0)
-	unstructured, err := testutil.ConvertTFJobToUnstructured(tfJob)
-	if err != nil {
-		t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-	}
-
-	if err := tfJobIndexer.Add(unstructured); err != nil {
-		t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-	}
-	pod := testutil.NewPod(tfJob, testutil.LabelWorker, 0)
-	ctr.AddPod(pod)
-
-	syncChan <- "sync"
-	if key != testutil.GetKey(tfJob, t) {
-		t.Errorf("Failed to enqueue the TFJob %s: expected %s, got %s", tfJob.Name, testutil.GetKey(tfJob, t), key)
-	}
-	close(stopCh)
-}
-
-func TestExpectation(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-
-	ctr.PodControl = &control.FakePodControl{}
-	tfJob := testutil.NewTFJob(2, 1)
-
-	var err error
-	if err = ctr.createNewPod(tfJob, "worker", "0",
-		tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker],
-		false, tfJob.Spec.TFReplicaSpecs); err != nil {
-		t.Errorf("Expected get nil, got error %v", err)
-	}
-
-	tfjobKey, err := KeyFunc(tfJob)
-	if err != nil {
-		t.Errorf("Expected nil, got error %v", err)
-	}
-	expectationPodsKey := expectation.GenExpectationPodsKey(tfjobKey, "worker")
-	e, found, err := ctr.Expectations.GetExpectations(expectationPodsKey)
-	if err != nil {
-		t.Errorf("Expected nil, got error %v", err)
-	}
-	if !found {
-		t.Errorf("Expected to get the corresponding expectation")
-	}
-	if add, del := e.GetExpectations(); add != 1 || del != 0 {
-		t.Errorf("Expected get 1 add and 0 del, got %d add and %d del", add, del)
-	}
-}
-
-func TestExpectationWithError(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-
-	ctr.PodControl = &control.FakePodControl{}
-	tfJob := testutil.NewTFJob(2, 1)
-
-	// Fake an error.
-	ctr.PodControl.(*control.FakePodControl).Err = fmt.Errorf("Fake")
-
-	var err error
-	if err = ctr.createNewPod(tfJob, "worker", "0",
-		tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker],
-		false, tfJob.Spec.TFReplicaSpecs); err == nil {
-		t.Errorf("Expected error, got nil")
-	}
-
-	tfjobKey, err := KeyFunc(tfJob)
-	if err != nil {
-		t.Errorf("Expected nil, got error %v", err)
-	}
-	expectationPodsKey := expectation.GenExpectationPodsKey(tfjobKey, "worker")
-	e, found, err := ctr.Expectations.GetExpectations(expectationPodsKey)
-	if err != nil {
-		t.Errorf("Expected nil, got error %v", err)
-	}
-	if !found {
-		t.Errorf("Expected to get the corresponding expectation")
-	}
-	if add, del := e.GetExpectations(); add != 0 || del != 0 {
-		t.Errorf("Expected get 0 add and 0 del, got %d add and %d del", add, del)
-	}
-}
-
-func TestClusterSpec(t *testing.T) {
-	type tc struct {
-		tfJob               *tfv1.TFJob
-		rt                  string
-		index               string
-		customClusterDomain string
-		expectedClusterSpec string
-	}
-	testCase := []tc{
-		tc{
-			tfJob:               testutil.NewTFJobWithNamespace(1, 0, "ns0"),
-			rt:                  "worker",
-			index:               "0",
-			customClusterDomain: "",
-			expectedClusterSpec: "",
-		},
-		tc{
-			tfJob:               testutil.NewTFJobWithNamespace(1, 0, "ns1"),
-			rt:                  "worker",
-			index:               "0",
-			customClusterDomain: "tf.training.com",
-			expectedClusterSpec: "",
-		},
-		tc{
-			tfJob:               testutil.NewTFJobWithNamespace(1, 1, "ns2"),
-			rt:                  "worker",
-			index:               "0",
-			customClusterDomain: "tf.training.org",
-			expectedClusterSpec: `{"cluster":{"ps":["` + testutil.TestTFJobName +
-				`-ps-0.ns2.svc.tf.training.org:2222"],"worker":["` + testutil.TestTFJobName +
-				`-worker-0.ns2.svc.tf.training.org:2222"]},"task":{"type":"worker","index":0},"environment":"cloud"}`,
-		},
-		tc{
-			tfJob:               testutil.NewTFJobWithEvaluatorAndNamespace(1, 1, 1, "ns3"),
-			rt:                  "worker",
-			index:               "0",
-			customClusterDomain: "tf.training.io",
-			expectedClusterSpec: `{"cluster":{"evaluator":["` + testutil.TestTFJobName +
-				`-evaluator-0.ns3.svc.tf.training.io:2222"],"ps":["` + testutil.TestTFJobName +
-				`-ps-0.ns3.svc.tf.training.io:2222"],"worker":["` + testutil.TestTFJobName +
-				`-worker-0.ns3.svc.tf.training.io:2222"]},"task":{"type":"worker","index":0},"environment":"cloud"}`,
-		},
-		tc{
-			tfJob:               testutil.NewTFJobWithEvaluatorAndNamespace(1, 1, 1, "ns3"),
-			rt:                  "worker",
-			index:               "0",
-			customClusterDomain: "",
-			expectedClusterSpec: `{"cluster":{"evaluator":["` + testutil.TestTFJobName +
-				`-evaluator-0.ns3.svc:2222"],"ps":["` + testutil.TestTFJobName +
-				`-ps-0.ns3.svc:2222"],"worker":["` + testutil.TestTFJobName +
-				`-worker-0.ns3.svc:2222"]},"task":{"type":"worker","index":0},"environment":"cloud"}`,
-		},
-	}
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-
-	for _, c := range testCase {
-		os.Setenv(EnvCustomClusterDomain, c.customClusterDomain)
-
-		podTemplate := c.tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].Template.DeepCopy()
-
-		// Set name for the template.
-		podTemplate.Name = common.GenGeneralName(c.tfJob.GetName(), c.rt, c.index)
-
-		if podTemplate.Labels == nil {
-			podTemplate.Labels = make(map[string]string)
-		}
-
-		// Set type and index for the worker.
-		labels := ctr.GenLabels(c.tfJob.GetName())
-		labels[commonv1.ReplicaTypeLabel] = c.rt
-		labels[commonv1.ReplicaIndexLabel] = c.index
-
-		for key, value := range labels {
-			podTemplate.Labels[key] = value
-		}
-
-		if err := ctr.SetClusterSpec(c.tfJob, podTemplate, c.rt, c.index); err != nil {
-			t.Errorf("Failed to set cluster spec: %v", err)
-		}
-		// The expected cluster spec is nil, which means that we should not set TF_CONFIG.
-		if c.expectedClusterSpec == "" {
-			if len(podTemplate.Spec.Containers[0].Env) != 0 {
-				t.Errorf("Expected empty TF_CONFIG, got %s",
-					podTemplate.Spec.Containers[0].Env[0].Value)
-			}
-		} else {
-			actual := podTemplate.Spec.Containers[0].Env[0].Value
-			if c.expectedClusterSpec != actual {
-				t.Errorf("Expected %s, got %s", c.expectedClusterSpec, actual)
-			}
-		}
-	}
-}
-
-func TestIsDistributed(t *testing.T) {
-	type tc struct {
-		tfJob    *tfv1.TFJob
-		expected bool
-	}
-	testCase := []tc{
-		{
-			tfJob:    testutil.NewTFJob(1, 0),
-			expected: false,
-		},
-		{
-			tfJob:    testutil.NewTFJob(1, 1),
-			expected: true,
-		},
-		{
-			tfJob:    testutil.NewTFJob(0, 1),
-			expected: false,
-		},
-		{
-			tfJob:    testutil.NewTFJobWithChief(1, 0),
-			expected: true,
-		},
-	}
-	for _, c := range testCase {
-		actual := isDistributed(c.tfJob)
-		if actual != c.expected {
-			t.Errorf("Expected %t, got %t", c.expected, actual)
-		}
-	}
-}
-
-func TestRestartPolicy(t *testing.T) {
-	type tc struct {
-		tfJob                 *tfv1.TFJob
-		expectedRestartPolicy v1.RestartPolicy
-		expectedType          commonv1.ReplicaType
-	}
-	testCase := []tc{
-		func() tc {
-			tfJob := testutil.NewTFJob(1, 0)
-			specRestartPolicy := commonv1.RestartPolicyExitCode
-			tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].RestartPolicy = specRestartPolicy
-			return tc{
-				tfJob:                 tfJob,
-				expectedRestartPolicy: v1.RestartPolicyNever,
-				expectedType:          tfv1.TFReplicaTypeWorker,
-			}
-		}(),
-		func() tc {
-			tfJob := testutil.NewTFJob(1, 0)
-			specRestartPolicy := commonv1.RestartPolicyNever
-			tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].RestartPolicy = specRestartPolicy
-			return tc{
-				tfJob:                 tfJob,
-				expectedRestartPolicy: v1.RestartPolicyNever,
-				expectedType:          tfv1.TFReplicaTypeWorker,
-			}
-		}(),
-		func() tc {
-			tfJob := testutil.NewTFJob(1, 0)
-			specRestartPolicy := commonv1.RestartPolicyAlways
-			tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].RestartPolicy = specRestartPolicy
-			return tc{
-				tfJob:                 tfJob,
-				expectedRestartPolicy: v1.RestartPolicyAlways,
-				expectedType:          tfv1.TFReplicaTypeWorker,
-			}
-		}(),
-		func() tc {
-			tfJob := testutil.NewTFJob(1, 0)
-			specRestartPolicy := commonv1.RestartPolicyOnFailure
-			tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].RestartPolicy = specRestartPolicy
-			return tc{
-				tfJob:                 tfJob,
-				expectedRestartPolicy: v1.RestartPolicyOnFailure,
-				expectedType:          tfv1.TFReplicaTypeWorker,
-			}
-		}(),
-	}
-	for _, c := range testCase {
-		spec := c.tfJob.Spec.TFReplicaSpecs[c.expectedType]
-		podTemplate := spec.Template
-		setRestartPolicy(&podTemplate, spec)
-		if podTemplate.Spec.RestartPolicy != c.expectedRestartPolicy {
-			t.Errorf("Expected %s, got %s", c.expectedRestartPolicy, podTemplate.Spec.RestartPolicy)
-		}
-	}
-}
-
-func TestExitCode(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	fakePodControl := &control.FakePodControl{}
-	ctr.PodControl = fakePodControl
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-	tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-	podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-
-	stopCh := make(chan struct{})
-	run := func(<-chan struct{}) {
-		if err := ctr.Run(testutil.ThreadCount, stopCh); err != nil {
-			t.Errorf("Failed to run the controller: %v", err)
-		}
-	}
-	go run(stopCh)
-
-	tfJob := testutil.NewTFJob(1, 0)
-	tfJob.Spec.TFReplicaSpecs[tfv1.TFReplicaTypeWorker].RestartPolicy = commonv1.RestartPolicyExitCode
-	unstructured, err := testutil.ConvertTFJobToUnstructured(tfJob)
-	if err != nil {
-		t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-	}
-
-	if err := tfJobIndexer.Add(unstructured); err != nil {
-		t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-	}
-	pod := testutil.NewPod(tfJob, testutil.LabelWorker, 0)
-	pod.Status.Phase = v1.PodFailed
-	pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{})
-	pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, v1.ContainerStatus{
-		Name: tfv1.DefaultContainerName,
-		State: v1.ContainerState{
-			Terminated: &v1.ContainerStateTerminated{
-				ExitCode: 130,
-			},
-		},
-	})
-
-	if err := podIndexer.Add(pod); err != nil {
-		t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-	}
-	_ = ctr.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy)
-	// _, err = ctr.syncTFJob(testutil.GetKey(tfJob, t))
-	// if err != nil {
-	// 	t.Errorf("%s: unexpected error when syncing jobs %v", tfJob.Name, err)
-	// }
-
-	found := false
-	for _, deletedPodName := range fakePodControl.DeletePodName {
-		if deletedPodName == pod.Name {
-			found = true
-		}
-	}
-	if !found {
-		t.Errorf("Failed to delete pod %s", pod.Name)
-	}
-	close(stopCh)
-}
-
-// Test scaling down number of workers while training is running
-func TestScaleDown(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	fakePodControl := &control.FakePodControl{}
-	ctr.PodControl = fakePodControl
-	ctr.Recorder = &record.FakeRecorder{}
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-	tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-	podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-
-	stopCh := make(chan struct{})
-	run := func(<-chan struct{}) {
-		if err := ctr.Run(testutil.ThreadCount, stopCh); err != nil {
-			t.Errorf("Failed to run the controller: %v", err)
-		}
-	}
-	go run(stopCh)
-
-	tfJob := testutil.NewTFJob(2, 0)
-	tfJob.SelfLink = "/api/v1/namespaces/default/tfjob/test-tfjob"
-	tfJob.Spec.EnableDynamicWorker = true
-	unstructured, err := testutil.ConvertTFJobToUnstructured(tfJob)
-	if err != nil {
-		t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-	}
-
-	if err := tfJobIndexer.Add(unstructured); err != nil {
-		t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-	}
-	pod0 := testutil.NewPod(tfJob, testutil.LabelWorker, 0)
-	pod1 := testutil.NewPod(tfJob, testutil.LabelWorker, 1)
-	pod2 := testutil.NewPod(tfJob, testutil.LabelWorker, 2)
-
-	if err := podIndexer.Add(pod0); err != nil {
-		t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-	}
-	if err := podIndexer.Add(pod1); err != nil {
-		t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-	}
-	if err := podIndexer.Add(pod2); err != nil {
-		t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-	}
-
-	_ = ctr.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy)
-	// _, err = ctr.syncTFJob(testutil.GetKey(tfJob, t))
-	// if err != nil {
-	// 	t.Errorf("%s: unexpected error when syncing jobs %v", tfJob.Name, err)
-	// }
-
-	expectedDeletePods := []string{"worker-2"}
-	if !reflect.DeepEqual(expectedDeletePods, fakePodControl.DeletePodName) {
-		t.Errorf("Scale down workers test failed")
-	}
-	close(stopCh)
-}
-
-// Test scaling up number of workers while training is running
-func TestScaleUp(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	fakePodControl := &control.FakePodControl{}
-	ctr.PodControl = fakePodControl
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-	tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-	podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-
-	stopCh := make(chan struct{})
-	run := func(<-chan struct{}) {
-		if err := ctr.Run(testutil.ThreadCount, stopCh); err != nil {
-			t.Errorf("Failed to run the controller: %v", err)
-		}
-	}
-	go run(stopCh)
-
-	tfJob := testutil.NewTFJob(3, 0)
-	tfJob.Spec.EnableDynamicWorker = true
-	unstructured, err := testutil.ConvertTFJobToUnstructured(tfJob)
-	if err != nil {
-		t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-	}
-
-	if err := tfJobIndexer.Add(unstructured); err != nil {
-		t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-	}
-	pod0 := testutil.NewPod(tfJob, testutil.LabelWorker, 0)
-
-	if err := podIndexer.Add(pod0); err != nil {
-		t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-	}
-
-	_ = ctr.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy)
-	// _, err = ctr.syncTFJob(testutil.GetKey(tfJob, t))
-	// if err != nil {
-	// 	t.Errorf("%s: unexpected error when syncing jobs %v", tfJob.Name, err)
-	// }
-
-	if !(len(fakePodControl.Templates) == 2 && fakePodControl.Templates[0].Name == "test-tfjob-worker-1" && fakePodControl.Templates[1].Name == "test-tfjob-worker-2") {
-		t.Error("Scale up workers test failed")
-	}
-
-	close(stopCh)
-}
-
-func TestIsWorker0Completed(t *testing.T) {
-	newInt32 := func(in int32) *int32 {
-		return &in
-	}
-	tests := []struct {
-		// worker failed, succeeded, running num
-		workers     [3]int32
-		tfJob       *tfv1.TFJob
-		replicas    map[commonv1.ReplicaType]*commonv1.ReplicaSpec
-		expected    bool
-		expectedErr bool
-	}{
-		{
-			workers:     [3]int32{0, 0, 1},
-			tfJob:       testutil.NewTFJobV2(1, 1, 0, 0, 0),
-			expected:    false,
-			expectedErr: false,
-			replicas: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{
-				tfv1.TFReplicaTypeWorker: {
-					Replicas: newInt32(1),
-					Template: testutil.NewTFReplicaSpecTemplate(),
-				},
-				tfv1.TFReplicaTypePS: {
-					Replicas: newInt32(1),
-					Template: testutil.NewTFReplicaSpecTemplate(),
-				},
-			},
-		},
-		{
-			workers:     [3]int32{0, 1, 0},
-			tfJob:       testutil.NewTFJobV2(1, 0, 0, 0, 0),
-			expected:    true,
-			expectedErr: false,
-			replicas: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{
-				tfv1.TFReplicaTypeWorker: {
-					Replicas: newInt32(1),
-					Template: testutil.NewTFReplicaSpecTemplate(),
-				},
-			},
-		},
-		{
-			workers:     [3]int32{0, 0, 0},
-			tfJob:       testutil.NewTFJobV2(0, 0, 1, 0, 0),
-			expected:    true,
-			expectedErr: false,
-			replicas: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{
-				tfv1.TFReplicaTypeMaster: {
-					Replicas: newInt32(1),
-					Template: testutil.NewTFReplicaSpecTemplate(),
-				},
-			},
-		},
-		{
-			workers:     [3]int32{0, 0, 0},
-			tfJob:       testutil.NewTFJobV2(0, 0, 0, 1, 0),
-			expected:    true,
-			expectedErr: false,
-			replicas: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{
-				tfv1.TFReplicaTypeChief: {
-					Replicas: newInt32(1),
-					Template: testutil.NewTFReplicaSpecTemplate(),
-				},
-			},
-		},
-		{
-			workers:     [3]int32{1, 1, 0},
-			tfJob:       testutil.NewTFJobV2(2, 0, 0, 0, 0),
-			expected:    true,
-			expectedErr: false,
-			replicas: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{
-				tfv1.TFReplicaTypeWorker: {
-					Replicas: newInt32(2),
-					Template: testutil.NewTFReplicaSpecTemplate(),
-				},
-			},
-		},
-		{
-			workers:     [3]int32{1, 0, 1},
-			tfJob:       testutil.NewTFJobV2(2, 0, 0, 0, 0),
-			expected:    false,
-			expectedErr: false,
-			replicas: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{
-				tfv1.TFReplicaTypeWorker: {
-					Replicas: newInt32(2),
-					Template: testutil.NewTFReplicaSpecTemplate(),
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		// Prepare the clientset and controller for the test.
-		kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &v1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		// Prepare the volcano clientset and controller for the test.
-		volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &batchv1beta1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		config := &rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &tfv1.GroupVersion,
-			},
-		}
-		tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-		ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-			volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-		ctr.tfJobInformerSynced = testutil.AlwaysReady
-		ctr.PodInformerSynced = testutil.AlwaysReady
-		ctr.ServiceInformerSynced = testutil.AlwaysReady
-		podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-
-		// only related to worker status
-		initializeReplicaStatuses(&tt.tfJob.Status, tfv1.TFReplicaTypeWorker)
-		// set status and add pod to indexer
-		setStatusForTest(tt.tfJob, tfv1.TFReplicaTypeWorker, tt.workers[0], tt.workers[1], tt.workers[2], false, true, podIndexer, t)
-
-		got, err := ctr.IsWorker0Completed(tt.tfJob, tt.replicas)
-		if (err != nil) != tt.expectedErr {
-			t.Errorf("IsWorker0Completed() error = %v, wantErr %v", err, tt.expectedErr)
-			return
-		}
-		if got != tt.expected {
-			t.Errorf("IsWorker0Completed() got = %v, want %v", got, tt.expected)
-		}
-	}
-}
diff --git a/pkg/controller.v1/tensorflow/status.go b/pkg/controller.v1/tensorflow/status.go
deleted file mode 100644
index 4d078f7679..0000000000
--- a/pkg/controller.v1/tensorflow/status.go
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package controller provides a Kubernetes controller for a TFJob resource.
-package tensorflow
-
-import (
-	"context"
-	"fmt"
-	"time"
-
-	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	commonutil "github.com/kubeflow/common/pkg/util"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-	corev1 "k8s.io/api/core/v1"
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-)
-
-const (
-	// tfJobCreatedReason is added in a tfjob when it is created.
-	tfJobCreatedReason = "TFJobCreated"
-	// tfJobSucceededReason is added in a tfjob when it is succeeded.
-	tfJobSucceededReason = "TFJobSucceeded"
-	// tfJobRunningReason is added in a tfjob when it is running.
-	tfJobRunningReason = "TFJobRunning"
-	// tfJobFailedReason is added in a tfjob when it is failed.
-	tfJobFailedReason = "TFJobFailed"
-	// tfJobRestarting is added in a tfjob when it is restarting.
-	tfJobRestartingReason = "TFJobRestarting"
-)
-
-var (
-	tfJobsSuccessCount = promauto.NewCounterVec(
-		prometheus.CounterOpts{
-			Name: "tf_operator_jobs_successful_total",
-			Help: "Counts number of TF jobs successful",
-		},
-		[]string{"job_namespace"},
-	)
-	tfJobsFailureCount = promauto.NewCounterVec(
-		prometheus.CounterOpts{
-			Name: "tf_operator_jobs_failed_total",
-			Help: "Counts number of TF jobs failed",
-		},
-		[]string{"job_namespace"},
-	)
-)
-
-func (tc *TFController) UpdateJobStatus(job interface{}, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, jobStatus *commonv1.JobStatus) error {
-	tfJob, ok := job.(*tfv1.TFJob)
-	if !ok {
-		return fmt.Errorf("%v is not a type of TFJob", tfJob)
-	}
-
-	tfJobKey, err := KeyFunc(tfJob)
-	if err != nil {
-		utilruntime.HandleError(fmt.Errorf("couldn't get key for tfjob object %#v: %v", tfJob, err))
-		return err
-	}
-
-	logger := commonutil.LoggerForJob(tfJob)
-
-	worker0Completed, err := tc.IsWorker0Completed(tfJob, replicas)
-	if err != nil {
-		logger.Warnf("check if worker 0 completed error %v", err)
-		return err
-	}
-
-	// Set StartTime.
-	if jobStatus.StartTime == nil {
-		now := metav1.Now()
-		jobStatus.StartTime = &now
-		// enqueue a sync to check if job past ActiveDeadlineSeconds
-		if tfJob.Spec.RunPolicy.ActiveDeadlineSeconds != nil {
-			logger.Infof("Job with ActiveDeadlineSeconds will sync after %d seconds", *tfJob.Spec.RunPolicy.ActiveDeadlineSeconds)
-			tc.WorkQueue.AddAfter(tfJobKey, time.Duration(*tfJob.Spec.RunPolicy.ActiveDeadlineSeconds)*time.Second)
-		}
-	}
-	// iterate the replica spec based on this order
-	allTypes := []commonv1.ReplicaType{
-		tfv1.TFReplicaTypeChief,
-		tfv1.TFReplicaTypeEval,
-		tfv1.TFReplicaTypeMaster,
-		tfv1.TFReplicaTypePS,
-		tfv1.TFReplicaTypeWorker,
-	}
-	for _, rtype := range allTypes {
-		if replicas[rtype] == nil {
-			continue
-		}
-		spec := replicas[rtype]
-		status := jobStatus.ReplicaStatuses[rtype]
-
-		// Expect to have `replicas - succeeded` pods alive.
-		succeeded := status.Succeeded
-		expected := *(spec.Replicas) - succeeded
-		running := status.Active
-		failed := status.Failed
-
-		logger.Infof("TFJob=%s/%s, ReplicaType=%s expected=%d, running=%d, failed=%d",
-			tfJob.Namespace, tfJob.Name, rtype, expected, running, failed)
-
-		// If the TFJob contains Chief or Master spec, then we will update the status
-		// according to the Chief/Master spec.
-		if ContainChieforMasterSpec(tfJob.Spec.TFReplicaSpecs) {
-			if tfv1.IsChieforMaster(rtype) {
-				if running > 0 {
-					msg := fmt.Sprintf("TFJob %s/%s is running.",
-						tfJob.Namespace, tfJob.Name)
-					err := commonutil.UpdateJobConditions(jobStatus,
-						commonv1.JobRunning, tfJobRunningReason, msg)
-					if err != nil {
-						commonutil.LoggerForJob(tfJob).Infof(
-							"Append tfjob condition error: %v", err)
-						return err
-					}
-				}
-				if expected == 0 {
-					msg := fmt.Sprintf("TFJob %s/%s successfully completed.",
-						tfJob.Namespace, tfJob.Name)
-					tc.Recorder.Event(tfJob, corev1.EventTypeNormal, tfJobSucceededReason, msg)
-					if jobStatus.CompletionTime == nil {
-						now := metav1.Now()
-						jobStatus.CompletionTime = &now
-					}
-					err := commonutil.UpdateJobConditions(jobStatus,
-						commonv1.JobSucceeded, tfJobSucceededReason, msg)
-					if err != nil {
-						commonutil.LoggerForJob(tfJob).Infof("Append tfjob condition error: %v", err)
-						return err
-					}
-					tfJobsSuccessCount.WithLabelValues(tfJob.Namespace).Inc()
-				}
-			}
-		} else {
-			if rtype == tfv1.TFReplicaTypeWorker {
-				// Leave a succeeded condition for the following two cases:
-				// 1. If default success policy is used and worker 0 has completed.
-				// 2. If `SuccessPolicyAllWorkers` success policy is used and all workers are succeeded.
-				if expected == 0 || (worker0Completed && *tfJob.Spec.SuccessPolicy != tfv1.SuccessPolicyAllWorkers) {
-					msg := fmt.Sprintf("TFJob %s/%s successfully completed.",
-						tfJob.Namespace, tfJob.Name)
-					tc.Recorder.Event(tfJob, corev1.EventTypeNormal, tfJobSucceededReason, msg)
-					if jobStatus.CompletionTime == nil {
-						now := metav1.Now()
-						jobStatus.CompletionTime = &now
-					}
-					err := commonutil.UpdateJobConditions(jobStatus,
-						commonv1.JobSucceeded, tfJobSucceededReason, msg)
-					if err != nil {
-						commonutil.LoggerForJob(tfJob).Infof("Append tfjob condition error: %v", err)
-						return err
-					}
-					tfJobsSuccessCount.WithLabelValues(tfJob.Namespace).Inc()
-				} else if running > 0 {
-					// Some workers are still running, leave a running condition.
-					msg := fmt.Sprintf("TFJob %s/%s is running.",
-						tfJob.Namespace, tfJob.Name)
-					err := commonutil.UpdateJobConditions(jobStatus, commonv1.JobRunning, tfJobRunningReason, msg)
-					if err != nil {
-						commonutil.LoggerForJob(tfJob).Infof("Append tfjob condition error: %v", err)
-						return err
-					}
-				}
-			}
-		}
-
-		if failed > 0 {
-			restart := false
-			for _, condition := range jobStatus.Conditions {
-				if condition.Type == commonv1.JobRestarting {
-					restart = true
-				}
-			}
-
-			if restart {
-				// job is restarting, no need to set it failed
-				// we know it because we update the status condition when reconciling the replicas
-				tfJobsFailureCount.WithLabelValues(tfJob.Namespace).Inc()
-			} else {
-				msg := fmt.Sprintf("TFJob %s/%s has failed because %d %s replica(s) failed.",
-					tfJob.Namespace, tfJob.Name, failed, rtype)
-				tc.Recorder.Event(tfJob, corev1.EventTypeNormal, tfJobFailedReason, msg)
-				if jobStatus.CompletionTime == nil {
-					now := metav1.Now()
-					jobStatus.CompletionTime = &now
-				}
-				err := commonutil.UpdateJobConditions(jobStatus,
-					commonv1.JobFailed, tfJobFailedReason, msg)
-				if err != nil {
-					commonutil.LoggerForJob(tfJob).Infof("Append tfjob condition error: %v", err)
-					return err
-				}
-				tfJobsFailureCount.WithLabelValues(tfJob.Namespace).Inc()
-			}
-		}
-	}
-	// we assign the jobStatus to the tfJob.Status for testing purpose
-	// it won't effect the main reconcile logic
-	// because we already use oldStatus := jobStatus.DeepCopy() to record the oldStatus
-	// and use !reflect.DeepEqual(*oldStatus, jobStatus) to decide whether to update the tfJob or not
-	tfJob.Status = *jobStatus.DeepCopy()
-
-	return nil
-}
-
-// UpdateJobStatusInApiServer updates the status of the given TFJob.
-func (tc *TFController) UpdateJobStatusInApiServer(job interface{}, jobStatus *commonv1.JobStatus) error {
-	tfJob, ok := job.(*tfv1.TFJob)
-	if !ok {
-		return fmt.Errorf("%v is not a type of TFJob", tfJob)
-	}
-
-	startTime := time.Now()
-	logger := commonutil.LoggerForJob(tfJob)
-	defer func() {
-		logger.Infof("Finished updating TFJobs Status %q (%v)",
-			tfJob.Name, time.Since(startTime))
-	}()
-
-	tfJob = tfJob.DeepCopy()
-	tfJob.Status = *jobStatus.DeepCopy()
-
-	_, err := tc.tfJobClientSet.KubeflowV1().TFJobs(tfJob.Namespace).UpdateStatus(context.TODO(), tfJob, metav1.UpdateOptions{})
-	return err
-}
-
-// initializeReplicaStatuses initializes the ReplicaStatuses for replica.
-func initializeReplicaStatuses(jobStatus *commonv1.JobStatus, rtype commonv1.ReplicaType) {
-	if jobStatus.ReplicaStatuses == nil {
-		jobStatus.ReplicaStatuses = make(map[commonv1.ReplicaType]*commonv1.ReplicaStatus)
-	}
-
-	jobStatus.ReplicaStatuses[rtype] = &commonv1.ReplicaStatus{}
-}
-
-// updateJobReplicaStatuses updates the JobReplicaStatuses according to the pod.
-func updateJobReplicaStatuses(jobStatus *commonv1.JobStatus, rtype commonv1.ReplicaType, pod *corev1.Pod) {
-	switch pod.Status.Phase {
-	case corev1.PodRunning:
-		jobStatus.ReplicaStatuses[rtype].Active++
-	case corev1.PodSucceeded:
-		jobStatus.ReplicaStatuses[rtype].Succeeded++
-	case corev1.PodFailed:
-		jobStatus.ReplicaStatuses[rtype].Failed++
-	}
-}
-
-func isSucceeded(status commonv1.JobStatus) bool {
-	return hasCondition(status, commonv1.JobSucceeded)
-}
-
-func isFailed(status commonv1.JobStatus) bool {
-	return hasCondition(status, commonv1.JobFailed)
-}
-
-func hasCondition(status commonv1.JobStatus, condType commonv1.JobConditionType) bool {
-	for _, condition := range status.Conditions {
-		if condition.Type == condType && condition.Status == v1.ConditionTrue {
-			return true
-		}
-	}
-	return false
-}
diff --git a/pkg/controller.v1/tensorflow/status_test.go b/pkg/controller.v1/tensorflow/status_test.go
deleted file mode 100644
index 29d2f65bce..0000000000
--- a/pkg/controller.v1/tensorflow/status_test.go
+++ /dev/null
@@ -1,592 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package controller provides a Kubernetes controller for a TFJob resource.
-package tensorflow
-
-import (
-	"fmt"
-	"testing"
-
-	corev1 "k8s.io/api/core/v1"
-	v1 "k8s.io/api/core/v1"
-	kubeclientset "k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/rest"
-	"k8s.io/client-go/tools/cache"
-	"k8s.io/client-go/tools/record"
-
-	batchv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
-	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
-
-	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	"github.com/kubeflow/common/pkg/controller.v1/control"
-	"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
-	"github.com/kubeflow/tf-operator/pkg/common/util/v1/testutil"
-)
-
-func TestFailed(t *testing.T) {
-	// Prepare the clientset and controller for the test.
-	kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &v1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	// Prepare the volcano clientset and controller for the test.
-	volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &batchv1beta1.SchemeGroupVersion,
-		},
-	},
-	)
-
-	config := &rest.Config{
-		Host: "",
-		ContentConfig: rest.ContentConfig{
-			GroupVersion: &tfv1.GroupVersion,
-		},
-	}
-	tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-	ctr, _, _ := newTFController(config, kubeClientSet,
-		volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-	ctr.tfJobInformerSynced = testutil.AlwaysReady
-	ctr.PodInformerSynced = testutil.AlwaysReady
-	ctr.ServiceInformerSynced = testutil.AlwaysReady
-
-	tfJob := testutil.NewTFJob(3, 0)
-	initializeReplicaStatuses(&tfJob.Status, tfv1.TFReplicaTypeWorker)
-	pod := testutil.NewBasePod("pod", tfJob)
-	pod.Status.Phase = v1.PodFailed
-
-	updateJobReplicaStatuses(&tfJob.Status, tfv1.TFReplicaTypeWorker, pod)
-	if tfJob.Status.ReplicaStatuses[commonv1.ReplicaType(tfv1.TFReplicaTypeWorker)].Failed != 1 {
-		t.Errorf("Failed to set the failed to 1")
-	}
-
-	err := ctr.UpdateJobStatus(tfJob, tfJob.Spec.TFReplicaSpecs, &tfJob.Status)
-	if err != nil {
-		t.Errorf("Expected error %v to be nil", err)
-	}
-	found := false
-	for _, condition := range tfJob.Status.Conditions {
-		if condition.Type == commonv1.JobFailed {
-			found = true
-		}
-	}
-	if !found {
-		t.Errorf("Failed condition is not found")
-	}
-}
-
-func TestStatus(t *testing.T) {
-	type testCase struct {
-		description string
-		tfJob       *tfv1.TFJob
-
-		expectedFailedPS    int32
-		expectedSucceededPS int32
-		expectedActivePS    int32
-
-		expectedFailedWorker    int32
-		expectedSucceededWorker int32
-		expectedActiveWorker    int32
-
-		expectedFailedChief    int32
-		expectedSucceededChief int32
-		expectedActiveChief    int32
-
-		restart          bool
-		worker0Completed bool
-
-		expectedType commonv1.JobConditionType
-	}
-
-	testCases := []testCase{
-		testCase{
-			description:             "Chief worker is succeeded",
-			tfJob:                   testutil.NewTFJobWithChief(1, 0),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 1,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  1,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobSucceeded,
-		},
-		testCase{
-			description:             "Chief worker is running",
-			tfJob:                   testutil.NewTFJobWithChief(1, 0),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     1,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobRunning,
-		},
-		testCase{
-			description:             "Chief worker is failed",
-			tfJob:                   testutil.NewTFJobWithChief(1, 0),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     1,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobFailed,
-		},
-		testCase{
-			description:             "(No chief worker) Worker is failed",
-			tfJob:                   testutil.NewTFJob(1, 0),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    1,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobFailed,
-		},
-		testCase{
-			description:             "(No chief worker) Worker is succeeded",
-			tfJob:                   testutil.NewTFJob(1, 0),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 1,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobSucceeded,
-		},
-		testCase{
-			description:             "(No chief worker) Worker is running",
-			tfJob:                   testutil.NewTFJob(1, 0),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    1,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobRunning,
-		},
-		testCase{
-			description:             "(No chief worker) 2 workers are succeeded, 2 workers are active",
-			tfJob:                   testutil.NewTFJob(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 2,
-			expectedActiveWorker:    2,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobRunning,
-		},
-		testCase{
-			description:             "(No chief worker) 2 workers are running, 2 workers are failed",
-			tfJob:                   testutil.NewTFJob(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    2,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    2,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobFailed,
-		},
-		testCase{
-			description:             "(No chief worker) 2 workers are succeeded, 2 workers are failed",
-			tfJob:                   testutil.NewTFJob(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    2,
-			expectedSucceededWorker: 2,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobFailed,
-		},
-		testCase{
-			description:             "(No chief worker) worker-0 are succeeded, 3 workers are active",
-			tfJob:                   testutil.NewTFJob(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 1,
-			expectedActiveWorker:    3,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        true,
-			expectedType:            commonv1.JobSucceeded,
-		},
-		testCase{
-			description:             "(No chief worker, successPolicy: AllWorkers) worker-0 are succeeded, 3 workers are active",
-			tfJob:                   testutil.NewTFJobWithSuccessPolicy(4, 0, tfv1.SuccessPolicyAllWorkers),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 1,
-			expectedActiveWorker:    3,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        true,
-			expectedType:            commonv1.JobRunning,
-		},
-		testCase{
-			description:             "(No chief worker, successPolicy: AllWorkers) 4 workers are succeeded",
-			tfJob:                   testutil.NewTFJobWithSuccessPolicy(4, 0, tfv1.SuccessPolicyAllWorkers),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 4,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        true,
-			expectedType:            commonv1.JobSucceeded,
-		},
-		testCase{
-			description:             "(No chief worker, successPolicy: AllWorkers) worker-0 is succeeded, 2 workers are running, 1 worker is failed",
-			tfJob:                   testutil.NewTFJobWithSuccessPolicy(4, 0, tfv1.SuccessPolicyAllWorkers),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        0,
-			expectedFailedWorker:    1,
-			expectedSucceededWorker: 1,
-			expectedActiveWorker:    2,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        true,
-			expectedType:            commonv1.JobFailed,
-		},
-		testCase{
-			description:             "Chief is running, workers are failed",
-			tfJob:                   testutil.NewTFJobWithChief(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    4,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     1,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobRunning,
-		},
-		testCase{
-			description:             "Chief is running, workers are succeeded",
-			tfJob:                   testutil.NewTFJobWithChief(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 4,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     1,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobRunning,
-		},
-		testCase{
-			description:             "Chief is running, a PS is failed",
-			tfJob:                   testutil.NewTFJobWithChief(4, 2),
-			expectedFailedPS:        1,
-			expectedSucceededPS:     0,
-			expectedActivePS:        1,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 4,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     1,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobFailed,
-		},
-		testCase{
-			description:             "Chief is failed, workers are succeeded",
-			tfJob:                   testutil.NewTFJobWithChief(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    0,
-			expectedSucceededWorker: 4,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     1,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobFailed,
-		},
-		testCase{
-			description:             "Chief is succeeded, workers are failed",
-			tfJob:                   testutil.NewTFJobWithChief(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    4,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     0,
-			expectedSucceededChief:  1,
-			expectedActiveChief:     0,
-			restart:                 false,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobSucceeded,
-		},
-		testCase{
-			description:             "Chief is failed and restarting",
-			tfJob:                   testutil.NewTFJobWithChief(4, 2),
-			expectedFailedPS:        0,
-			expectedSucceededPS:     0,
-			expectedActivePS:        2,
-			expectedFailedWorker:    4,
-			expectedSucceededWorker: 0,
-			expectedActiveWorker:    0,
-			expectedFailedChief:     1,
-			expectedSucceededChief:  0,
-			expectedActiveChief:     0,
-			restart:                 true,
-			worker0Completed:        false,
-			expectedType:            commonv1.JobRestarting,
-		},
-	}
-
-	for i, c := range testCases {
-		// Prepare the clientset and controller for the test.
-		kubeClientSet := kubeclientset.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &v1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		// Prepare the volcano clientset and controller for the test.
-		volcanoClientSet := volcanoclient.NewForConfigOrDie(&rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &batchv1beta1.SchemeGroupVersion,
-			},
-		},
-		)
-
-		config := &rest.Config{
-			Host: "",
-			ContentConfig: rest.ContentConfig{
-				GroupVersion: &tfv1.GroupVersion,
-			},
-		}
-		tfJobClientSet := tfjobclientset.NewForConfigOrDie(config)
-		ctr, kubeInformerFactory, _ := newTFController(config, kubeClientSet,
-			volcanoClientSet, tfJobClientSet, 0, options.ServerOption{})
-		fakePodControl := &control.FakePodControl{}
-		ctr.PodControl = fakePodControl
-		ctr.Recorder = &record.FakeRecorder{}
-		ctr.tfJobInformerSynced = testutil.AlwaysReady
-		ctr.PodInformerSynced = testutil.AlwaysReady
-		ctr.ServiceInformerSynced = testutil.AlwaysReady
-		tfJobIndexer := ctr.tfJobInformer.GetIndexer()
-		podIndexer := kubeInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-
-		stopCh := make(chan struct{})
-		run := func(<-chan struct{}) {
-			if err := ctr.Run(testutil.ThreadCount, stopCh); err != nil {
-				t.Errorf("Failed to run the controller: %v", err)
-			}
-		}
-		go run(stopCh)
-
-		unstructured, err := testutil.ConvertTFJobToUnstructured(c.tfJob)
-		if err != nil {
-			t.Errorf("Failed to convert the TFJob to Unstructured: %v", err)
-		}
-
-		if err := tfJobIndexer.Add(unstructured); err != nil {
-			t.Errorf("Failed to add tfjob to tfJobIndexer: %v", err)
-		}
-
-		initializeReplicaStatuses(&c.tfJob.Status, tfv1.TFReplicaTypeWorker)
-		initializeReplicaStatuses(&c.tfJob.Status, tfv1.TFReplicaTypeChief)
-		initializeReplicaStatuses(&c.tfJob.Status, tfv1.TFReplicaTypePS)
-
-		setStatusForTest(c.tfJob, tfv1.TFReplicaTypePS, c.expectedFailedPS, c.expectedSucceededPS, c.expectedActivePS, c.restart, c.worker0Completed, podIndexer, t)
-		setStatusForTest(c.tfJob, tfv1.TFReplicaTypeWorker, c.expectedFailedWorker, c.expectedSucceededWorker, c.expectedActiveWorker, c.restart, c.worker0Completed, podIndexer, t)
-		setStatusForTest(c.tfJob, tfv1.TFReplicaTypeChief, c.expectedFailedChief, c.expectedSucceededChief, c.expectedActiveChief, c.restart, c.worker0Completed, podIndexer, t)
-
-		// err = ctr.UpdateJobStatus(c.tfJob, c.tfJob.Spec.TFReplicaSpecs, &c.tfJob.Status)
-		// if err != nil {
-		// 	t.Errorf("%s: Expected error %v to be nil", c.description, err)
-		// }
-		_ = ctr.ReconcileJobs(c.tfJob, c.tfJob.Spec.TFReplicaSpecs, c.tfJob.Status, &c.tfJob.Spec.RunPolicy)
-
-		// Test filterOutCondition
-		filterOutConditionTest(c.tfJob.Status, t)
-
-		found := false
-		for _, condition := range c.tfJob.Status.Conditions {
-			if condition.Type == c.expectedType {
-				found = true
-			}
-		}
-		if !found {
-			t.Errorf("Case[%d]%s: Condition %s is not found", i, c.description, c.expectedType)
-		}
-	}
-}
-
-func setStatusForTest(tfJob *tfv1.TFJob, rtype commonv1.ReplicaType, failed, succeeded, active int32, restart bool, worker0Completed bool, podIndexer cache.Indexer, t *testing.T) {
-	if restart == true {
-		tfJob.Spec.TFReplicaSpecs[rtype].RestartPolicy = commonv1.RestartPolicyExitCode
-	}
-
-	var typ string
-	switch rtype {
-	case tfv1.TFReplicaTypeWorker:
-		typ = testutil.LabelWorker
-	case tfv1.TFReplicaTypePS:
-		typ = testutil.LabelPS
-	case tfv1.TFReplicaTypeChief:
-		typ = testutil.LabelChief
-	default:
-		fmt.Println("wrong type")
-	}
-
-	var i int32
-	index := 0
-	for i = 0; i < succeeded; i++ {
-		pod := testutil.NewPod(tfJob, typ, index)
-		pod.Status.Phase = v1.PodSucceeded
-		if worker0Completed == true && rtype == tfv1.TFReplicaTypeWorker && index == 0 {
-			pod.Status.ContainerStatuses = []v1.ContainerStatus{
-				{
-					Name: tfv1.DefaultContainerName,
-					State: corev1.ContainerState{
-						Terminated: &corev1.ContainerStateTerminated{
-							ExitCode: int32(0), // exit with 0
-						},
-					},
-				},
-			}
-		}
-		if err := podIndexer.Add(pod); err != nil {
-			t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-		}
-		updateJobReplicaStatuses(&tfJob.Status, rtype, pod)
-
-		index++
-	}
-	for i = 0; i < failed; i++ {
-		pod := testutil.NewPod(tfJob, typ, index)
-		pod.Status.Phase = v1.PodFailed
-		if restart == true {
-			if pod.Status.ContainerStatuses == nil {
-				pod.Status.ContainerStatuses = []v1.ContainerStatus{
-					{
-						Name: tfv1.DefaultContainerName,
-						State: corev1.ContainerState{
-							Terminated: &corev1.ContainerStateTerminated{
-								ExitCode: int32(130), // 130 is a retryable code
-							},
-						},
-					},
-				}
-			}
-		}
-		if err := podIndexer.Add(pod); err != nil {
-			t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-		}
-		updateJobReplicaStatuses(&tfJob.Status, rtype, pod)
-		index++
-	}
-	for i = 0; i < active; i++ {
-		pod := testutil.NewPod(tfJob, typ, index)
-		pod.Status.Phase = v1.PodRunning
-		if err := podIndexer.Add(pod); err != nil {
-			t.Errorf("%s: unexpected error when adding pod %v", tfJob.Name, err)
-		}
-		updateJobReplicaStatuses(&tfJob.Status, rtype, pod)
-		index++
-	}
-}
-
-func filterOutConditionTest(status commonv1.JobStatus, t *testing.T) {
-	flag := isFailed(status) || isSucceeded(status)
-	for _, condition := range status.Conditions {
-		if flag && condition.Type == commonv1.JobRunning && condition.Status == v1.ConditionTrue {
-			t.Error("Error condition status when succeeded or failed")
-		}
-	}
-}
diff --git a/pkg/controller.v1/tensorflow/tfjob_controller.go b/pkg/controller.v1/tensorflow/tfjob_controller.go
index 2c8f93da1e..379f8bb407 100644
--- a/pkg/controller.v1/tensorflow/tfjob_controller.go
+++ b/pkg/controller.v1/tensorflow/tfjob_controller.go
@@ -21,55 +21,76 @@ import (
 	"strings"
 	"time"
 
-	"github.com/kubeflow/tf-operator/pkg/apis/tensorflow/validation"
-
-	"sigs.k8s.io/controller-runtime/pkg/event"
-
-	"sigs.k8s.io/controller-runtime/pkg/controller"
-	"sigs.k8s.io/controller-runtime/pkg/handler"
-	"sigs.k8s.io/controller-runtime/pkg/predicate"
-	"sigs.k8s.io/controller-runtime/pkg/source"
-
-	train_util "github.com/kubeflow/common/pkg/util/train"
-
+	"github.com/go-logr/logr"
 	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
-	commonutil "github.com/kubeflow/common/pkg/util"
-
-	"k8s.io/apimachinery/pkg/api/errors"
-	"k8s.io/apimachinery/pkg/runtime/schema"
-
-	"github.com/kubeflow/common/pkg/controller.v1/expectation"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-	kubeclientset "k8s.io/client-go/kubernetes"
-	"sigs.k8s.io/controller-runtime/pkg/manager"
-	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
-
-	"k8s.io/apimachinery/pkg/types"
-
 	"github.com/kubeflow/common/pkg/controller.v1/common"
 	"github.com/kubeflow/common/pkg/controller.v1/control"
+	"github.com/kubeflow/common/pkg/controller.v1/expectation"
+	commonutil "github.com/kubeflow/common/pkg/util"
+	train_util "github.com/kubeflow/common/pkg/util/train"
+	tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
+	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
+	"github.com/kubeflow/tf-operator/pkg/apis/tensorflow/validation"
+	trainingoperatorcommon "github.com/kubeflow/tf-operator/pkg/common"
+	"github.com/kubeflow/tf-operator/pkg/common/util"
+	"github.com/sirupsen/logrus"
 	corev1 "k8s.io/api/core/v1"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-
-	"k8s.io/client-go/tools/record"
-
-	"github.com/sirupsen/logrus"
-	"sigs.k8s.io/controller-runtime/pkg/log"
-
-	"github.com/go-logr/logr"
 	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	kubeclientset "k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/record"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
-
-	tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	trainingoperatorcommon "github.com/kubeflow/tf-operator/pkg/common"
-	"github.com/kubeflow/tf-operator/pkg/common/util"
+	"sigs.k8s.io/controller-runtime/pkg/controller"
+	"sigs.k8s.io/controller-runtime/pkg/event"
+	"sigs.k8s.io/controller-runtime/pkg/handler"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/manager"
+	"sigs.k8s.io/controller-runtime/pkg/predicate"
+	"sigs.k8s.io/controller-runtime/pkg/source"
+	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
 )
 
-var (
-	defaultCleanPodPolicy = commonv1.CleanPodPolicyNone
+const (
+	// tfJobSucceededReason is added in a tfjob when it is succeeded.
+	tfJobSucceededReason = "TFJobSucceeded"
+	// tfJobRunningReason is added in a tfjob when it is running.
+	tfJobRunningReason = "TFJobRunning"
+	// tfJobFailedReason is added in a tfjob when it is failed.
+	tfJobFailedReason = "TFJobFailed"
+	// tfJobRestarting is added in a tfjob when it is restarting.
+	tfJobRestartingReason = "TFJobRestarting"
+
+	FailedDeleteJobReason     = "FailedDeleteJob"
+	SuccessfulDeleteJobReason = "SuccessfulDeleteJob"
+
+	controllerName = "tfjob-controller"
+
+	// labels for pods and servers.
+	tfReplicaTypeLabel  = "replica-type"
+	tfReplicaIndexLabel = "replica-index"
+	// volcanoTaskSpecKey task spec key used in pod annotation when EnableGangScheduling is true
+	volcanoTaskSpecKey = "volcano.sh/task-spec"
+
+	// gang scheduler name.
+	gangSchedulerName = "volcano"
+	// tfConfig is the environment variable name of TensorFlow cluster spec.
+	tfConfig = "TF_CONFIG"
+	// exitedWithCodeReason is the normal reason when the pod is exited because of the exit code.
+	exitedWithCodeReason = "ExitedWithCode"
+	// podTemplateRestartPolicyReason is the warning reason when the restart
+	// policy is set in pod template.
+	podTemplateRestartPolicyReason = "SettedPodTemplateRestartPolicy"
+	// podTemplateSchedulerNameReason is the warning reason when other scheduler name is set
+	// in pod templates with gang-scheduling enabled
+	podTemplateSchedulerNameReason = "SettedPodTemplateSchedulerName"
+	// gangSchedulingPodGroupAnnotation is the annotation key used by batch schedulers
+	gangSchedulingPodGroupAnnotation = "scheduling.k8s.io/group-name"
 )
 
 func NewReconciler(mgr manager.Manager) *TFJobReconciler {
@@ -356,7 +377,7 @@ func (r *TFJobReconciler) UpdateJobStatus(job interface{}, replicas map[commonv1
 		return fmt.Errorf("%v is not a type of TFJob", tfJob)
 	}
 
-	tfJobKey, err := KeyFunc(tfJob)
+	tfJobKey, err := common.KeyFunc(tfJob)
 	if err != nil {
 		utilruntime.HandleError(fmt.Errorf("couldn't get key for tfjob object %#v: %v", tfJob, err))
 		return err
@@ -407,7 +428,7 @@ func (r *TFJobReconciler) UpdateJobStatus(job interface{}, replicas map[commonv1
 
 		// If the TFJob contains Chief or Master spec, then we will update the status
 		// according to the Chief/Master spec.
-		if ContainChieforMasterSpec(tfJob.Spec.TFReplicaSpecs) {
+		if ContainsChiefOrMasterSpec(tfJob.Spec.TFReplicaSpecs) {
 			if tensorflowv1.IsChieforMaster(rtype) {
 				if running > 0 {
 					msg := fmt.Sprintf("TFJob %s/%s is running.",
@@ -574,28 +595,24 @@ func (r *TFJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.Po
 	return nil
 }
 
-// Same as (tc *TFController) GetDefaultContainerName(..) in controller.go
 func (r *TFJobReconciler) GetDefaultContainerName() string {
 	return tensorflowv1.DefaultContainerName
 }
 
-// Same as (tc *TFController) GetDefaultContainerPortName(..) in controller.go
 func (r *TFJobReconciler) GetDefaultContainerPortName() string {
 	return tensorflowv1.DefaultPortName
 }
 
-// Same as (tc *TFController) IsMasterRole(..) in controller.go
 func (r *TFJobReconciler) IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec,
 	rtype commonv1.ReplicaType, index int) bool {
-	if ContainChieforMasterSpec(replicas) {
+	if ContainsChiefOrMasterSpec(replicas) {
 		return rtype == tensorflowv1.TFReplicaTypeChief || rtype == tensorflowv1.TFReplicaTypeMaster
 	}
 	// else check if it is worker with index 0
 	return rtype == tensorflowv1.TFReplicaTypeWorker && index == 0
 }
 
-// Following are replicatef from TFController
-// IsWorker0Completed return true if pod of worker0 succeeded and exited with 0
+// IsWorker0Completed returns true if pod of worker0 succeeded and exited with 0
 func (r *TFJobReconciler) IsWorker0Completed(tfjob *tensorflowv1.TFJob, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) (bool, error) {
 	worker0Completed := false
 	_, ok := replicas[tensorflowv1.TFReplicaTypeWorker]
@@ -746,7 +763,7 @@ func (r *TFJobReconciler) ReconcilePods(
 func (r *TFJobReconciler) createNewPod(tfjob *tfv1.TFJob, rt, index string, spec *commonv1.ReplicaSpec, masterRole bool,
 	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error {
 
-	tfjobKey, err := KeyFunc(tfjob)
+	tfjobKey, err := common.KeyFunc(tfjob)
 	if err != nil {
 		utilruntime.HandleError(fmt.Errorf("couldn't get key for tfjob object %#v: %v", tfjob, err))
 		return err
diff --git a/pkg/controller.v1/tensorflow/util.go b/pkg/controller.v1/tensorflow/util.go
index c7e86c1049..93aa781544 100644
--- a/pkg/controller.v1/tensorflow/util.go
+++ b/pkg/controller.v1/tensorflow/util.go
@@ -15,14 +15,9 @@
 package tensorflow
 
 import (
-	"fmt"
-
 	commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"
 	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-)
-
-var (
-	errPortNotFound = fmt.Errorf("failed to found the port")
+	corev1 "k8s.io/api/core/v1"
 )
 
 // GetPortFromTFJob gets the port of tensorflow container.
@@ -41,8 +36,8 @@ func GetPortFromTFJob(tfJob *tfv1.TFJob, rtype commonv1.ReplicaType) (int32, err
 	return tfv1.DefaultPort, nil
 }
 
-// ContainChieforMasterSpec returns true if the tfjob contains chief or master spec.
-func ContainChieforMasterSpec(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) bool {
+// ContainsChiefOrMasterSpec returns true if the tfjob contains chief or master spec.
+func ContainsChiefOrMasterSpec(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) bool {
 	if _, ok := replicas[tfv1.TFReplicaTypeChief]; ok {
 		return true
 	} else if _, ok := replicas[tfv1.TFReplicaTypeMaster]; ok {
@@ -50,3 +45,74 @@ func ContainChieforMasterSpec(replicas map[commonv1.ReplicaType]*commonv1.Replic
 	}
 	return false
 }
+
+// originally from pkg/controller.v1/tensorflow/pod.go (deleted)
+func getContainerExitCode(pod *corev1.Pod) int32 {
+	var exitCode int32 = 0xbeef // magic number
+	for _, status := range pod.Status.ContainerStatuses {
+		state := status.State
+		if status.Name == tfv1.DefaultContainerName && state.Terminated != nil {
+			exitCode = state.Terminated.ExitCode
+		}
+	}
+	return exitCode
+}
+
+// originally from pkg/controller.v1/tensorflow/pod.go (deleted)
+func setRestartPolicy(podTemplateSpec *corev1.PodTemplateSpec, spec *commonv1.ReplicaSpec) {
+	// This is necessary since restartPolicyExitCode is not supported in v1.PodTemplateSpec
+	if spec.RestartPolicy == commonv1.RestartPolicyExitCode {
+		podTemplateSpec.Spec.RestartPolicy = corev1.RestartPolicyNever
+	} else {
+		podTemplateSpec.Spec.RestartPolicy = corev1.RestartPolicy(spec.RestartPolicy)
+	}
+}
+
+// isDistributed returns if the TFJob is a distributed training job.
+// Ref https://github.com/kubeflow/tf-operator/issues/1078.
+// originally from pkg/controller.v1/tensorflow/pod.go (deleted)
+func isDistributed(tfjob *tfv1.TFJob) bool {
+	replicas := tfjob.Spec.TFReplicaSpecs
+	distributionCount := 0
+	allTypes := []commonv1.ReplicaType{
+		tfv1.TFReplicaTypeChief,
+		tfv1.TFReplicaTypeEval,
+		tfv1.TFReplicaTypeMaster,
+		tfv1.TFReplicaTypePS,
+		tfv1.TFReplicaTypeWorker,
+	}
+	// Check if there is only one replica.
+	for _, typ := range allTypes {
+		if replicas[typ] != nil {
+			if replicas[typ].Replicas == nil {
+				distributionCount++
+			} else {
+				distributionCount += int(*replicas[typ].Replicas)
+			}
+		}
+	}
+	return distributionCount != 1
+}
+
+// initializeReplicaStatuses initializes the ReplicaStatuses for replica.
+// originally from pkg/controller.v1/tensorflow/status.go (deleted)
+func initializeReplicaStatuses(jobStatus *commonv1.JobStatus, rtype commonv1.ReplicaType) {
+	if jobStatus.ReplicaStatuses == nil {
+		jobStatus.ReplicaStatuses = make(map[commonv1.ReplicaType]*commonv1.ReplicaStatus)
+	}
+
+	jobStatus.ReplicaStatuses[rtype] = &commonv1.ReplicaStatus{}
+}
+
+// updateJobReplicaStatuses updates the JobReplicaStatuses according to the pod.
+// originally from pkg/controller.v1/tensorflow/status.go (deleted)
+func updateJobReplicaStatuses(jobStatus *commonv1.JobStatus, rtype commonv1.ReplicaType, pod *corev1.Pod) {
+	switch pod.Status.Phase {
+	case corev1.PodRunning:
+		jobStatus.ReplicaStatuses[rtype].Active++
+	case corev1.PodSucceeded:
+		jobStatus.ReplicaStatuses[rtype].Succeeded++
+	case corev1.PodFailed:
+		jobStatus.ReplicaStatuses[rtype].Failed++
+	}
+}
diff --git a/pkg/controller.v1/tensorflow/util_test.go b/pkg/controller.v1/tensorflow/util_test.go
deleted file mode 100644
index 938ae7475b..0000000000
--- a/pkg/controller.v1/tensorflow/util_test.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2018 The Kubeflow Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tensorflow
-
-import (
-	"testing"
-
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/types"
-
-	tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
-	"github.com/kubeflow/tf-operator/pkg/common/util/v1/testutil"
-)
-
-func TestGenOwnerReference(t *testing.T) {
-	testName := "test-tfjob"
-	testUID := types.UID("test-UID")
-	tfJob := &tfv1.TFJob{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: testName,
-			UID:  testUID,
-		},
-	}
-
-	ref := testutil.GenOwnerReference(tfJob)
-	if ref.UID != testUID {
-		t.Errorf("Expected UID %s, got %s", testUID, ref.UID)
-	}
-	if ref.Name != testName {
-		t.Errorf("Expected Name %s, got %s", testName, ref.Name)
-	}
-	if ref.APIVersion != tfv1.GroupVersion.Version {
-		t.Errorf("Expected APIVersion %s, got %s", tfv1.GroupVersion.String(), ref.APIVersion)
-	}
-}
-
-func TestGenLabels(t *testing.T) {
-	testKey := "test/key"
-	expctedKey := "test-key"
-
-	labels := testutil.GenLabels(testKey)
-	jobNamelabel := testutil.JobNameLabel
-
-	if labels[jobNamelabel] != expctedKey {
-		t.Errorf("Expected %s %s, got %s", jobNamelabel, expctedKey, jobNamelabel)
-	}
-	if labels[labelGroupName] != tfv1.GroupVersion.Group {
-		t.Errorf("Expected %s %s, got %s", labelGroupName, tfv1.GroupVersion.Group, labels[labelGroupName])
-	}
-}
-
-func TestConvertTFJobToUnstructured(t *testing.T) {
-	testName := "test-tfjob"
-	testUID := types.UID("test-UID")
-	tfJob := &tfv1.TFJob{
-		TypeMeta: metav1.TypeMeta{
-			Kind: tfv1.Kind,
-		},
-		ObjectMeta: metav1.ObjectMeta{
-			Name: testName,
-			UID:  testUID,
-		},
-	}
-
-	_, err := testutil.ConvertTFJobToUnstructured(tfJob)
-	if err != nil {
-		t.Errorf("Expected error to be nil while got %v", err)
-	}
-}
diff --git a/pkg/controller.v1/xgboost/xgboostjob_controller.go b/pkg/controller.v1/xgboost/xgboostjob_controller.go
index 97f60bf97e..d170bfa3f5 100644
--- a/pkg/controller.v1/xgboost/xgboostjob_controller.go
+++ b/pkg/controller.v1/xgboost/xgboostjob_controller.go
@@ -61,7 +61,7 @@ import (
 )
 
 const (
-	controllerName = "xgboostjob-operator"
+	controllerName = "xgboostjob-controller"
 
 	// Reasons for job events.
 	FailedDeleteJobReason     = "FailedDeleteJob"
diff --git a/py/kubeflow/tf_operator/release.py b/py/kubeflow/tf_operator/release.py
index df3b40bfbc..916349ebcf 100755
--- a/py/kubeflow/tf_operator/release.py
+++ b/py/kubeflow/tf_operator/release.py
@@ -145,11 +145,11 @@ def build_operator_image(root_dir,
   commit = build_and_push_image.GetGitHash(root_dir)
 
   targets = [
-    "github.com/kubeflow/tf-operator/cmd/tf-operator.v1",
+    "github.com/kubeflow/tf-operator/cmd/training-operator.v1",
   ]
   for t in targets:
     if t in [
-        "github.com/kubeflow/tf-operator/cmd/tf-operator.v1"
+        "github.com/kubeflow/tf-operator/cmd/training-operator.v1"
     ]:
       util.run([
         "go", "install", "-ldflags",
@@ -169,7 +169,7 @@ def build_operator_image(root_dir,
   # List of paths to copy relative to root.
   sources = [
     "build/images/tf_operator/Dockerfile", "examples/tf_sample/tf_smoke.py",
-    os.path.join(go_path, bin_path, "tf-operator.v1"),
+    os.path.join(go_path, bin_path, "training-operator.v1"),
     "cmd", "pkg", "third_party", "vendor", "go.mod", "go.sum"
   ]