Skip to content

Commit

Permalink
*: Use k8s.io/kubernetes/controller
Browse files Browse the repository at this point in the history
Signed-off-by: Ce Gao <[email protected]>
  • Loading branch information
gaocegege committed May 11, 2018
1 parent 84ed872 commit 4804a2a
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 866 deletions.
29 changes: 18 additions & 11 deletions pkg/controller.v2/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/controller"

tfv1alpha2 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1alpha2"
tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
Expand All @@ -58,14 +59,20 @@ const (
defaultPortStr = "2222"
)

// controllerKind contains the schema.GroupVersionKind for this controller type.
var controllerKind = tfv1alpha2.SchemeGroupVersion.WithKind("TFJob")
var (
// controllerKind contains the schema.GroupVersionKind for this controller type.
controllerKind = tfv1alpha2.SchemeGroupVersion.WithKind("TFJob")
groupVersionKind = schema.GroupVersionKind{
Group: tfv1alpha2.GroupName,
Version: tfv1alpha2.GroupVersion,
Kind: tfv1alpha2.TFJobResourceKind,
}

var groupVersionKind = schema.GroupVersionKind{
Group: tfv1alpha2.GroupName,
Version: tfv1alpha2.GroupVersion,
Kind: tfv1alpha2.TFJobResourceKind,
}
// KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc.
// IndexerInformer uses a delta queue, therefore for deletes we have to use this
// key function but it should be just fine for non delete events.
KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
)

// TFJobControllerConfiguration contains configuration of tf-operator.
// DefaultTimerConfig is the suggested tf-operator configuration for production.
Expand All @@ -88,7 +95,7 @@ type TFJobController struct {
config TFJobControllerConfiguration

// podControl is used to add or delete pods.
podControl PodControlInterface
podControl controller.PodControlInterface

// serviceControl is used to add or delete services.
serviceControl ServiceControlInterface
Expand Down Expand Up @@ -140,7 +147,7 @@ type TFJobController struct {
// - "tf-operator/tfjob-abc/ps/pods", expects 2 adds.
// - "tf-operator/tfjob-abc/worker/services", expects 4 adds.
// - "tf-operator/tfjob-abc/worker/pods", expects 4 adds.
expectations ControllerExpectationsInterface
expectations controller.ControllerExpectationsInterface

// workQueue is a rate limited work queue. This is used to queue work to be
// processed instead of performing it as soon as a change happens. This
Expand Down Expand Up @@ -169,7 +176,7 @@ func NewTFJobController(
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClientSet.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: controllerName})

realPodControl := RealPodControl{
realPodControl := controller.RealPodControl{
KubeClient: kubeClientSet,
Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "tfjob-controller"}),
}
Expand All @@ -185,7 +192,7 @@ func NewTFJobController(
serviceControl: realServiceControl,
kubeClientSet: kubeClientSet,
tfJobClientSet: tfJobClientSet,
expectations: NewControllerExpectations(),
expectations: controller.NewControllerExpectations(),
workQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "tfjobs"),
recorder: recorder,
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/controller.v2/controller_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/kubernetes/pkg/controller"

tfv1alpha2 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1alpha2"
)
Expand Down Expand Up @@ -197,7 +198,7 @@ func (tc *TFJobController) getPodsForTFJob(tfjob *tfv1alpha2.TFJob) ([]*v1.Pod,
}
return fresh, nil
})
cm := NewPodControllerRefManager(tc.podControl, tfjob, selector, controllerKind, canAdoptFunc)
cm := controller.NewPodControllerRefManager(tc.podControl, tfjob, selector, controllerKind, canAdoptFunc)
return cm.ClaimPods(pods)
}

Expand Down
229 changes: 5 additions & 224 deletions pkg/controller.v2/controller_ref_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,238 +20,19 @@ package controller

import (
"fmt"
"sync"

"github.com/golang/glog"
// apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
// extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/kubernetes/pkg/controller"
)

type BaseControllerRefManager struct {
Controller metav1.Object
Selector labels.Selector

canAdoptErr error
canAdoptOnce sync.Once
CanAdoptFunc func() error
}

func (m *BaseControllerRefManager) CanAdopt() error {
m.canAdoptOnce.Do(func() {
if m.CanAdoptFunc != nil {
m.canAdoptErr = m.CanAdoptFunc()
}
})
return m.canAdoptErr
}

// ClaimObject tries to take ownership of an object for this controller.
//
// It will reconcile the following:
// * Adopt orphans if the match function returns true.
// * Release owned objects if the match function returns false.
//
// A non-nil error is returned if some form of reconciliation was attempted and
// failed. Usually, controllers should try again later in case reconciliation
// is still needed.
//
// If the error is nil, either the reconciliation succeeded, or no
// reconciliation was necessary. The returned boolean indicates whether you now
// own the object.
//
// No reconciliation will be attempted if the controller is being deleted.
func (m *BaseControllerRefManager) ClaimObject(obj metav1.Object, match func(metav1.Object) bool, adopt, release func(metav1.Object) error) (bool, error) {
controllerRef := metav1.GetControllerOf(obj)
if controllerRef != nil {
if controllerRef.UID != m.Controller.GetUID() {
// Owned by someone else. Ignore.
return false, nil
}
if match(obj) {
// We already own it and the selector matches.
// Return true (successfully claimed) before checking deletion timestamp.
// We're still allowed to claim things we already own while being deleted
// because doing so requires taking no actions.
return true, nil
}
// Owned by us but selector doesn't match.
// Try to release, unless we're being deleted.
if m.Controller.GetDeletionTimestamp() != nil {
return false, nil
}
if err := release(obj); err != nil {
// If the pod no longer exists, ignore the error.
if errors.IsNotFound(err) {
return false, nil
}
// Either someone else released it, or there was a transient error.
// The controller should requeue and try again if it's still stale.
return false, err
}
// Successfully released.
return false, nil
}

// It's an orphan.
if m.Controller.GetDeletionTimestamp() != nil || !match(obj) {
// Ignore if we're being deleted or selector doesn't match.
return false, nil
}
if obj.GetDeletionTimestamp() != nil {
// Ignore if the object is being deleted
return false, nil
}
// Selector matches. Try to adopt.
if err := adopt(obj); err != nil {
// If the pod no longer exists, ignore the error.
if errors.IsNotFound(err) {
return false, nil
}
// Either someone else claimed it first, or there was a transient error.
// The controller should requeue and try again if it's still orphaned.
return false, err
}
// Successfully adopted.
return true, nil
}

type PodControllerRefManager struct {
BaseControllerRefManager
controllerKind schema.GroupVersionKind
podControl PodControlInterface
}

// NewPodControllerRefManager returns a PodControllerRefManager that exposes
// methods to manage the controllerRef of pods.
//
// The CanAdopt() function can be used to perform a potentially expensive check
// (such as a live GET from the API server) prior to the first adoption.
// It will only be called (at most once) if an adoption is actually attempted.
// If CanAdopt() returns a non-nil error, all adoptions will fail.
//
// NOTE: Once CanAdopt() is called, it will not be called again by the same
// PodControllerRefManager instance. Create a new instance if it makes
// sense to check CanAdopt() again (e.g. in a different sync pass).
func NewPodControllerRefManager(
podControl PodControlInterface,
controller metav1.Object,
selector labels.Selector,
controllerKind schema.GroupVersionKind,
canAdopt func() error,
) *PodControllerRefManager {
return &PodControllerRefManager{
BaseControllerRefManager: BaseControllerRefManager{
Controller: controller,
Selector: selector,
CanAdoptFunc: canAdopt,
},
controllerKind: controllerKind,
podControl: podControl,
}
}

// ClaimPods tries to take ownership of a list of Pods.
//
// It will reconcile the following:
// * Adopt orphans if the selector matches.
// * Release owned objects if the selector no longer matches.
//
// Optional: If one or more filters are specified, a Pod will only be claimed if
// all filters return true.
//
// A non-nil error is returned if some form of reconciliation was attempted and
// failed. Usually, controllers should try again later in case reconciliation
// is still needed.
//
// If the error is nil, either the reconciliation succeeded, or no
// reconciliation was necessary. The list of Pods that you now own is returned.
func (m *PodControllerRefManager) ClaimPods(pods []*v1.Pod, filters ...func(*v1.Pod) bool) ([]*v1.Pod, error) {
var claimed []*v1.Pod
var errlist []error

match := func(obj metav1.Object) bool {
pod := obj.(*v1.Pod)
// Check selector first so filters only run on potentially matching Pods.
if !m.Selector.Matches(labels.Set(pod.Labels)) {
return false
}
for _, filter := range filters {
if !filter(pod) {
return false
}
}
return true
}
adopt := func(obj metav1.Object) error {
return m.AdoptPod(obj.(*v1.Pod))
}
release := func(obj metav1.Object) error {
return m.ReleasePod(obj.(*v1.Pod))
}

for _, pod := range pods {
ok, err := m.ClaimObject(pod, match, adopt, release)
if err != nil {
errlist = append(errlist, err)
continue
}
if ok {
claimed = append(claimed, pod)
}
}
return claimed, utilerrors.NewAggregate(errlist)
}

// AdoptPod sends a patch to take control of the pod. It returns the error if
// the patching fails.
func (m *PodControllerRefManager) AdoptPod(pod *v1.Pod) error {
if err := m.CanAdopt(); err != nil {
return fmt.Errorf("can't adopt Pod %v/%v (%v): %v", pod.Namespace, pod.Name, pod.UID, err)
}
// Note that ValidateOwnerReferences() will reject this patch if another
// OwnerReference exists with controller=true.
addControllerPatch := fmt.Sprintf(
`{"metadata":{"ownerReferences":[{"apiVersion":"%s","kind":"%s","name":"%s","uid":"%s","controller":true,"blockOwnerDeletion":true}],"uid":"%s"}}`,
m.controllerKind.GroupVersion(), m.controllerKind.Kind,
m.Controller.GetName(), m.Controller.GetUID(), pod.UID)
return m.podControl.PatchPod(pod.Namespace, pod.Name, []byte(addControllerPatch))
}

// ReleasePod sends a patch to free the pod from the control of the controller.
// It returns the error if the patching fails. 404 and 422 errors are ignored.
func (m *PodControllerRefManager) ReleasePod(pod *v1.Pod) error {
glog.V(2).Infof("patching pod %s_%s to remove its controllerRef to %s/%s:%s",
pod.Namespace, pod.Name, m.controllerKind.GroupVersion(), m.controllerKind.Kind, m.Controller.GetName())
deleteOwnerRefPatch := fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, m.Controller.GetUID(), pod.UID)
err := m.podControl.PatchPod(pod.Namespace, pod.Name, []byte(deleteOwnerRefPatch))
if err != nil {
if errors.IsNotFound(err) {
// If the pod no longer exists, ignore it.
return nil
}
if errors.IsInvalid(err) {
// Invalid error will be returned in two cases: 1. the pod
// has no owner reference, 2. the uid of the pod doesn't
// match, which means the pod is deleted and then recreated.
// In both cases, the error can be ignored.

// TODO: If the pod has owner references, but none of them
// has the owner.UID, server will silently ignore the patch.
// Investigate why.
return nil
}
}
return err
}

type ServiceControllerRefManager struct {
BaseControllerRefManager
controller.BaseControllerRefManager

controllerKind schema.GroupVersionKind
serviceControl ServiceControlInterface
Expand All @@ -270,14 +51,14 @@ type ServiceControllerRefManager struct {
// sense to check canAdopt() again (e.g. in a different sync pass).
func NewServiceControllerRefManager(
serviceControl ServiceControlInterface,
controller metav1.Object,
ctr metav1.Object,
selector labels.Selector,
controllerKind schema.GroupVersionKind,
canAdopt func() error,
) *ServiceControllerRefManager {
return &ServiceControllerRefManager{
BaseControllerRefManager: BaseControllerRefManager{
Controller: controller,
BaseControllerRefManager: controller.BaseControllerRefManager{
Controller: ctr,
Selector: selector,
CanAdoptFunc: canAdopt,
},
Expand Down
Loading

0 comments on commit 4804a2a

Please sign in to comment.