Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/controller-manager/app/controller_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
finetunev1beta1 "github.com/DataTunerX/meta-server/api/finetune/v1beta1"
"github.com/go-logr/zapr"
"github.com/operator-framework/operator-lib/leader"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
"github.com/spf13/pflag"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand All @@ -35,6 +36,7 @@ func init() {
utilruntime.Must(finetunev1beta1.AddToScheme(scheme))
utilruntime.Must(corev1beta1.AddToScheme(scheme))
utilruntime.Must(extensionv1beta1.AddToScheme(scheme))
utilruntime.Must(rayv1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

Expand Down
11 changes: 6 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ module github.com/DataTunerX/finetune-experiment-controller
go 1.19

require (
github.com/DataTunerX/meta-server v0.0.0-20231109015709-57812268ad17
github.com/DataTunerX/meta-server v0.0.0-20231116102108-24bd83a6be89
github.com/DataTunerX/utility-server v0.0.0-20231107081331-e4ac0bbd2db2
github.com/go-logr/zapr v1.2.3
github.com/operator-framework/operator-lib v0.11.0
github.com/ray-project/kuberay/ray-operator v1.0.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.17.0
k8s.io/api v0.26.0
Expand All @@ -32,7 +33,7 @@ require (
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/uuid v1.1.2 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/josharian/intern v1.0.0 // indirect
Expand All @@ -59,10 +60,10 @@ require (
go.uber.org/multierr v1.10.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/net v0.15.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.12.0 // indirect
golang.org/x/sys v0.12.0 // indirect
golang.org/x/term v0.12.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/time v0.3.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
Expand Down
27 changes: 18 additions & 9 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3f
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DataTunerX/meta-server v0.0.0-20231109015709-57812268ad17 h1:szsQx64N0bTO6qpCmD4V7Ne1AuF+y/KcRyyh7UE2SRQ=
github.com/DataTunerX/meta-server v0.0.0-20231109015709-57812268ad17/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231113032938-bf87d14956b1 h1:WxEyoS9Dlkm2Yfcpn0sL0Gz/xfXdN0fdxb/dGYAQIqQ=
github.com/DataTunerX/meta-server v0.0.0-20231113032938-bf87d14956b1/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231116063244-4b1d018072c0 h1:BJ6OqFz1ROHizgQ9eNWpWSCzMEe4PFLhCloBUsLrYa0=
github.com/DataTunerX/meta-server v0.0.0-20231116063244-4b1d018072c0/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231116064242-ea7bb845394f h1:ivD0gAMQ0gWtJ1/xWeUqkOce0PEO2LXWfjAAGiPwTvw=
github.com/DataTunerX/meta-server v0.0.0-20231116064242-ea7bb845394f/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231116102108-24bd83a6be89 h1:czoBDPd42BBGiCREjfnaxG5BNcHk+9MnkemXAnG/bEw=
github.com/DataTunerX/meta-server v0.0.0-20231116102108-24bd83a6be89/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/utility-server v0.0.0-20231107081331-e4ac0bbd2db2 h1:3mBAWDqYrWtDk9xvIHDG/dN5zGcliwJnyvpWHFHcC+A=
github.com/DataTunerX/utility-server v0.0.0-20231107081331-e4ac0bbd2db2/go.mod h1:qL3DYjQa7av0QkZoFrycHbpXHGQfBNEDke8uv+FdDn4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
Expand Down Expand Up @@ -177,8 +183,9 @@ github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLe
github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g=
Expand Down Expand Up @@ -281,6 +288,8 @@ github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1
github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo=
github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4=
github.com/ray-project/kuberay/ray-operator v1.0.0 h1:i69nvbV7az2FG41VHQgxrmhD+SUl8ca+ek4RPbSE2Q0=
github.com/ray-project/kuberay/ray-operator v1.0.0/go.mod h1:7C7ebIkxtkmOX8w1iiLrKM1j4hkZs/Guzm3WdePk/yg=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/sagikazarmark/locafero v0.3.0 h1:zT7VEGWC2DTflmccN/5T1etyKvxSxpHsjb9cJvm4SvQ=
Expand Down Expand Up @@ -416,8 +425,8 @@ golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qx
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down Expand Up @@ -487,12 +496,12 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU=
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down
77 changes: 68 additions & 9 deletions internal/controller/finetune/finetuneexperiment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ package finetune

import (
"context"
"fmt"
"time"

"github.com/DataTunerX/utility-server/logging"
"k8s.io/apimachinery/pkg/types"

"github.com/DataTunerX/finetune-experiment-controller/pkg/util/handlererr"
finetunev1beta1 "github.com/DataTunerX/meta-server/api/finetune/v1beta1"
Expand All @@ -39,10 +41,6 @@ type FinetuneExperimentReconciler struct {
Log logging.Logger
}

const (
finetuneFinalizer = "finetune.datatunerx.io/finalizer"
)

//+kubebuilder:rbac:groups=finetune.datatunerx.io,resources=finetuneexperiments,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=finetune.datatunerx.io,resources=finetuneexperiments/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=finetune.datatunerx.io,resources=finetuneexperiments/finalizers,verbs=update
Expand All @@ -60,25 +58,86 @@ func (r *FinetuneExperimentReconciler) Reconcile(ctx context.Context, req ctrl.R
}

if finetuneExperiment.GetDeletionTimestamp() != nil {
if controllerutil.ContainsFinalizer(finetuneExperiment, finetuneFinalizer) {
if controllerutil.ContainsFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer) {
// todo cleaner
controllerutil.RemoveFinalizer(finetuneExperiment, finetuneFinalizer)
controllerutil.RemoveFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer)
if err := r.Update(ctx, finetuneExperiment); err != nil {
r.Log.Errorf("Remove finalizer failed: %s/%s, Err: %v", req.Name, req.Namespace, err)
return handlererr.HandlerErr(err)
}
}
return handlererr.HandlerErr(nil)
}
if !controllerutil.ContainsFinalizer(finetuneExperiment, finetuneFinalizer) {
controllerutil.AddFinalizer(finetuneExperiment, finetuneFinalizer)
if !controllerutil.ContainsFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer) {
controllerutil.AddFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer)
err := r.Update(ctx, finetuneExperiment)
if err != nil {
r.Log.Errorf("Add finalizer failed: %s/%s, %v", req.Name, req.Namespace, err)
return handlererr.HandlerErr(err)
}
}
return ctrl.Result{}, nil

if finetuneExperiment.Spec.Pending {
finetuneExperiment.Status.State = finetunev1beta1.FinetuneExperimentPending
if err := r.Client.Status().Update(ctx, finetuneExperiment); err != nil {
r.Log.Errorf("Update fineExperiment %s/%s status failed", finetuneExperiment.Name, finetuneExperiment.Namespace)
return handlererr.HandlerErr(err)
}
return handlererr.HandlerErr(nil)
}

for i := range finetuneExperiment.Spec.FinetuneJobs {
finetuneJob := finetuneExperiment.Spec.FinetuneJobs[i]
if finetuneJob.Name == nil {
name := fmt.Sprintf("%s-%s", finetuneExperiment.Name, "finetunejob")
finetuneJob.Name = &name
}
finetuneJobInstance := &finetunev1beta1.FinetuneJob{}
finetuneJobInstance.Spec = finetuneJob.Spec
finetuneJobInstance.Name = *finetuneJob.Name
finetuneJobInstance.Namespace = finetuneExperiment.Namespace
if err := ctrl.SetControllerReference(finetuneExperiment, finetuneJobInstance, r.Scheme); err != nil {
r.Log.Errorf("SetControllerReference failed finetuneJob: %s/%s, owner finetuneExperiment: %s/%s, err: %v",
finetuneJobInstance.Name, finetuneJobInstance.Namespace, finetuneExperiment.Name, finetuneExperiment.Namespace, err)
return handlererr.HandlerErr(err)
}
if err := r.Client.Create(ctx, finetuneJobInstance); err != nil {
if !errors.IsAlreadyExists(err) {
r.Log.Errorf("Create finetuneJob %s/%s failed: %v", finetuneJobInstance.Name, finetuneJobInstance.Namespace, err)
return handlererr.HandlerErr(err)
}
}
existFinetuneJob := &finetunev1beta1.FinetuneJob{}
if err := r.Client.Get(ctx, types.NamespacedName{
Name: *finetuneJob.Name,
Namespace: finetuneExperiment.Namespace,
}, existFinetuneJob); err != nil {
r.Log.Errorf("Get finetuneJob failed: %v", err)
return handlererr.HandlerErr(err)
}
alreadyExists := false

// Iterate over the JobsStatus to check if existFinetuneJob.Name exists
for _, jobStatus := range finetuneExperiment.Status.JobsStatus {
if jobStatus.Name == existFinetuneJob.Name {
alreadyExists = true
break
}
}
if !alreadyExists {
finetuneExperiment.Status.JobsStatus = append(finetuneExperiment.Status.JobsStatus, finetunev1beta1.FinetuneJobStatusSetting{
Name: existFinetuneJob.Name,
FinetuneJobStatus: existFinetuneJob.Status,
})
}

}
finetuneExperiment.Status.State = finetunev1beta1.FinetuneExperimentProcessing
if err := r.Client.Status().Update(ctx, finetuneExperiment); err != nil {
r.Log.Errorf("Update fineExperiment %s/%s status failed", finetuneExperiment.Name, finetuneExperiment.Namespace)
return handlererr.HandlerErr(err)
}
return handlererr.HandlerErr(nil)
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
Loading