Skip to content

Commit 35d7414

Browse files
Implement multicluster client (#391)
Change summary - Implement support to maintain CRDs in remote clusters while others are maintained in the home cluster - Abstract cluster/client selection away in multicluster client which implements the client.Client interface and is passed to controllers as usual - Provide custom controller builder which watches resource in correct cluster - Provide guide how to test remote cluster setup with kind
1 parent 66de023 commit 35d7414

37 files changed

+988
-270
lines changed

Tiltfile

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ if not os.getenv('TILT_VALUES_PATH'):
1717
fail("TILT_VALUES_PATH is not set.")
1818
if not os.path.exists(os.getenv('TILT_VALUES_PATH')):
1919
fail("TILT_VALUES_PATH "+ os.getenv('TILT_VALUES_PATH') + " does not exist.")
20-
tilt_values = os.getenv('TILT_VALUES_PATH')
20+
tilt_values = [os.getenv('TILT_VALUES_PATH')]
21+
22+
tilt_overrides = os.getenv('TILT_OVERRIDES_PATH')
23+
if tilt_overrides and os.path.exists(tilt_overrides):
24+
tilt_values.append(tilt_overrides)
2125

2226
load('ext://helm_resource', 'helm_resource', 'helm_repo')
2327
helm_repo(
@@ -106,7 +110,7 @@ k8s_yaml(helm('./helm/bundles/cortex-crds', name='cortex-crds', set=[
106110

107111
if 'nova' in ACTIVE_DEPLOYMENTS:
108112
print("Activating Cortex Nova bundle")
109-
k8s_yaml(helm('./helm/bundles/cortex-nova', name='cortex-nova', values=[tilt_values]))
113+
k8s_yaml(helm('./helm/bundles/cortex-nova', name='cortex-nova', values=tilt_values))
110114
k8s_resource('cortex-nova-postgresql', labels=['Cortex-Nova'], port_forwards=[
111115
port_forward(8000, 5432),
112116
])
@@ -125,7 +129,7 @@ if 'nova' in ACTIVE_DEPLOYMENTS:
125129

126130
if 'manila' in ACTIVE_DEPLOYMENTS:
127131
print("Activating Cortex Manila bundle")
128-
k8s_yaml(helm('./helm/bundles/cortex-manila', name='cortex-manila', values=[tilt_values]))
132+
k8s_yaml(helm('./helm/bundles/cortex-manila', name='cortex-manila', values=tilt_values))
129133
k8s_resource('cortex-manila-postgresql', labels=['Cortex-Manila'], port_forwards=[
130134
port_forward(8002, 5432),
131135
])
@@ -142,7 +146,7 @@ if 'manila' in ACTIVE_DEPLOYMENTS:
142146
)
143147

144148
if 'cinder' in ACTIVE_DEPLOYMENTS:
145-
k8s_yaml(helm('./helm/bundles/cortex-cinder', name='cortex-cinder', values=[tilt_values]))
149+
k8s_yaml(helm('./helm/bundles/cortex-cinder', name='cortex-cinder', values=tilt_values))
146150
k8s_resource('cortex-cinder-postgresql', labels=['Cortex-Cinder'], port_forwards=[
147151
port_forward(8004, 5432),
148152
])
@@ -160,7 +164,7 @@ if 'cinder' in ACTIVE_DEPLOYMENTS:
160164

161165
if 'ironcore' in ACTIVE_DEPLOYMENTS:
162166
print("Activating Cortex IronCore bundle")
163-
k8s_yaml(helm('./helm/bundles/cortex-ironcore', name='cortex-ironcore', values=[tilt_values]))
167+
k8s_yaml(helm('./helm/bundles/cortex-ironcore', name='cortex-ironcore', values=tilt_values))
164168
k8s_resource('cortex-ironcore-controller-manager', labels=['Cortex-IronCore'])
165169
# Deploy resources in machines/samples
166170
k8s_yaml('samples/ironcore/machinepool.yaml')

api/v1alpha1/datasource_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,9 @@ type DatasourceList struct {
293293
Items []Datasource `json:"items"`
294294
}
295295

296+
func (*Datasource) URI() string { return "datasources.cortex.cloud/v1alpha1" }
297+
func (*DatasourceList) URI() string { return "datasources.cortex.cloud/v1alpha1" }
298+
296299
func init() {
297300
SchemeBuilder.Register(&Datasource{}, &DatasourceList{})
298301
}

api/v1alpha1/decision_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ type DecisionList struct {
155155
Items []Decision `json:"items"`
156156
}
157157

158+
func (*Decision) URI() string { return "decisions.cortex.cloud/v1alpha1" }
159+
func (*DecisionList) URI() string { return "decisions.cortex.cloud/v1alpha1" }
160+
158161
func init() {
159162
SchemeBuilder.Register(&Decision{}, &DecisionList{})
160163
}

api/v1alpha1/descheduling_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ type DeschedulingList struct {
102102
Items []Descheduling `json:"items"`
103103
}
104104

105+
func (*Descheduling) URI() string { return "deschedulings.cortex.cloud/v1alpha1" }
106+
func (*DeschedulingList) URI() string { return "deschedulings.cortex.cloud/v1alpha1" }
107+
105108
func init() {
106109
SchemeBuilder.Register(&Descheduling{}, &DeschedulingList{})
107110
}

api/v1alpha1/knowledge_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ type KnowledgeList struct {
174174
Items []Knowledge `json:"items"`
175175
}
176176

177+
func (*Knowledge) URI() string { return "knowledges.cortex.cloud/v1alpha1" }
178+
func (*KnowledgeList) URI() string { return "knowledges.cortex.cloud/v1alpha1" }
179+
177180
func init() {
178181
SchemeBuilder.Register(&Knowledge{}, &KnowledgeList{})
179182
}

api/v1alpha1/kpi_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ type KPIList struct {
9797
Items []KPI `json:"items"`
9898
}
9999

100+
func (*KPI) URI() string { return "kpis.cortex.cloud/v1alpha1" }
101+
func (*KPIList) URI() string { return "kpis.cortex.cloud/v1alpha1" }
102+
100103
func init() {
101104
SchemeBuilder.Register(&KPI{}, &KPIList{})
102105
}

api/v1alpha1/pipeline_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ type PipelineList struct {
9898
Items []Pipeline `json:"items"`
9999
}
100100

101+
func (*Pipeline) URI() string { return "pipelines.cortex.cloud/v1alpha1" }
102+
func (*PipelineList) URI() string { return "pipelines.cortex.cloud/v1alpha1" }
103+
101104
func init() {
102105
SchemeBuilder.Register(&Pipeline{}, &PipelineList{})
103106
}

api/v1alpha1/reservation_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ type ReservationList struct {
9797
Items []Reservation `json:"items"`
9898
}
9999

100+
func (*Reservation) URI() string { return "reservations.cortex.cloud/v1alpha1" }
101+
func (*ReservationList) URI() string { return "reservations.cortex.cloud/v1alpha1" }
102+
100103
func init() {
101104
SchemeBuilder.Register(&Reservation{}, &ReservationList{})
102105
}

api/v1alpha1/step_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ type StepList struct {
129129
Items []Step `json:"items"`
130130
}
131131

132+
func (*Step) URI() string { return "steps.cortex.cloud/v1alpha1" }
133+
func (*StepList) URI() string { return "steps.cortex.cloud/v1alpha1" }
134+
132135
func init() {
133136
SchemeBuilder.Register(&Step{}, &StepList{})
134137
}

cmd/main.go

Lines changed: 59 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
ctrl "sigs.k8s.io/controller-runtime"
2323
"sigs.k8s.io/controller-runtime/pkg/certwatcher"
2424
"sigs.k8s.io/controller-runtime/pkg/client"
25+
"sigs.k8s.io/controller-runtime/pkg/cluster"
2526
"sigs.k8s.io/controller-runtime/pkg/healthz"
2627
"sigs.k8s.io/controller-runtime/pkg/log/zap"
2728
"sigs.k8s.io/controller-runtime/pkg/metrics"
@@ -54,6 +55,7 @@ import (
5455
"github.com/cobaltcore-dev/cortex/pkg/conf"
5556
"github.com/cobaltcore-dev/cortex/pkg/db"
5657
"github.com/cobaltcore-dev/cortex/pkg/monitoring"
58+
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
5759
"github.com/sapcc/go-bits/httpext"
5860
"github.com/sapcc/go-bits/must"
5961
corev1 "k8s.io/api/core/v1"
@@ -243,6 +245,33 @@ func main() {
243245
os.Exit(1)
244246
}
245247

248+
homeCluster, err := cluster.New(restConfig, func(o *cluster.Options) { o.Scheme = scheme })
249+
if err != nil {
250+
setupLog.Error(err, "unable to create home cluster")
251+
os.Exit(1)
252+
}
253+
if err := mgr.Add(homeCluster); err != nil {
254+
setupLog.Error(err, "unable to add home cluster")
255+
os.Exit(1)
256+
}
257+
multiclusterClient := &multicluster.Client{
258+
HomeCluster: homeCluster,
259+
HomeRestConfig: restConfig,
260+
HomeScheme: scheme,
261+
}
262+
for _, override := range config.APIServerOverrides {
263+
cluster, err := multiclusterClient.AddRemote(override.Resource, override.Host, override.CACert)
264+
if err != nil {
265+
setupLog.Error(err, "unable to create cluster for apiserver override", "apiserver", override.Host)
266+
os.Exit(1)
267+
}
268+
// Also tell the manager about this cluster so that controllers can use it.
269+
if err := mgr.Add(cluster); err != nil {
270+
setupLog.Error(err, "unable to add cluster for apiserver override", "apiserver", override.Host)
271+
os.Exit(1)
272+
}
273+
}
274+
246275
// Our custom monitoring registry can add prometheus labels to all metrics.
247276
// This is useful to distinguish metrics from different deployments.
248277
metrics.Registry = monitoring.WrapRegistry(metrics.Registry, config.Monitoring)
@@ -265,14 +294,14 @@ func main() {
265294
Conf: config,
266295
}
267296
// Inferred through the base controller.
268-
decisionController.Client = mgr.GetClient()
297+
decisionController.Client = multiclusterClient
269298
decisionController.OperatorName = config.Operator
270-
if err := (decisionController).SetupWithManager(mgr); err != nil {
299+
if err := (decisionController).SetupWithManager(mgr, multiclusterClient); err != nil {
271300
setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler")
272301
os.Exit(1)
273302
}
274303
novashims.NewAPI(config, decisionController).Init(mux)
275-
go decisionsnova.CleanupNovaDecisionsRegularly(ctx, mgr.GetClient(), config)
304+
go decisionsnova.CleanupNovaDecisionsRegularly(ctx, multiclusterClient, config)
276305
}
277306
if slices.Contains(config.EnabledControllers, "nova-deschedulings-pipeline-controller") {
278307
// Deschedulings controller
@@ -284,18 +313,18 @@ func main() {
284313
CycleDetector: deschedulingnova.NewCycleDetector(),
285314
}
286315
// Inferred through the base controller.
287-
deschedulingsController.Client = mgr.GetClient()
316+
deschedulingsController.Client = multiclusterClient
288317
deschedulingsController.OperatorName = config.Operator
289-
if err := (deschedulingsController).SetupWithManager(mgr); err != nil {
318+
if err := (deschedulingsController).SetupWithManager(mgr, multiclusterClient); err != nil {
290319
setupLog.Error(err, "unable to create controller", "controller", "DeschedulingsReconciler")
291320
os.Exit(1)
292321
}
293322
go deschedulingsController.CreateDeschedulingsPeriodically(ctx)
294323
// Deschedulings cleanup on startup
295324
if err := (&deschedulingnova.Cleanup{
296-
Client: mgr.GetClient(),
325+
Client: multiclusterClient,
297326
Scheme: mgr.GetScheme(),
298-
}).SetupWithManager(mgr); err != nil {
327+
}).SetupWithManager(mgr, multiclusterClient); err != nil {
299328
setupLog.Error(err, "unable to create controller", "controller", "Cleanup")
300329
os.Exit(1)
301330
}
@@ -306,39 +335,39 @@ func main() {
306335
Conf: config,
307336
}
308337
// Inferred through the base controller.
309-
controller.Client = mgr.GetClient()
338+
controller.Client = multiclusterClient
310339
controller.OperatorName = config.Operator
311-
if err := (controller).SetupWithManager(mgr); err != nil {
340+
if err := (controller).SetupWithManager(mgr, multiclusterClient); err != nil {
312341
setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler")
313342
os.Exit(1)
314343
}
315344
manilashims.NewAPI(config, controller).Init(mux)
316-
go decisionsmanila.CleanupManilaDecisionsRegularly(ctx, mgr.GetClient(), config)
345+
go decisionsmanila.CleanupManilaDecisionsRegularly(ctx, multiclusterClient, config)
317346
}
318347
if slices.Contains(config.EnabledControllers, "cinder-decisions-pipeline-controller") {
319348
controller := &decisionscinder.DecisionPipelineController{
320349
Monitor: pipelineMonitor,
321350
Conf: config,
322351
}
323352
// Inferred through the base controller.
324-
controller.Client = mgr.GetClient()
353+
controller.Client = multiclusterClient
325354
controller.OperatorName = config.Operator
326-
if err := (controller).SetupWithManager(mgr); err != nil {
355+
if err := (controller).SetupWithManager(mgr, multiclusterClient); err != nil {
327356
setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler")
328357
os.Exit(1)
329358
}
330359
cindershims.NewAPI(config, controller).Init(mux)
331-
go decisionscinder.CleanupCinderDecisionsRegularly(ctx, mgr.GetClient(), config)
360+
go decisionscinder.CleanupCinderDecisionsRegularly(ctx, multiclusterClient, config)
332361
}
333362
if slices.Contains(config.EnabledControllers, "ironcore-decisions-pipeline-controller") {
334363
controller := &decisionsmachines.DecisionPipelineController{
335364
Monitor: pipelineMonitor,
336365
Conf: config,
337366
}
338367
// Inferred through the base controller.
339-
controller.Client = mgr.GetClient()
368+
controller.Client = multiclusterClient
340369
controller.OperatorName = config.Operator
341-
if err := (controller).SetupWithManager(mgr); err != nil {
370+
if err := (controller).SetupWithManager(mgr, multiclusterClient); err != nil {
342371
setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler")
343372
os.Exit(1)
344373
}
@@ -347,23 +376,23 @@ func main() {
347376
// Setup a controller which will reconcile the history and explanation for
348377
// decision resources.
349378
explanationController := &explanation.Controller{
350-
Client: mgr.GetClient(),
379+
Client: multiclusterClient,
351380
OperatorName: config.Operator,
352381
}
353-
if err := explanationController.SetupWithManager(mgr); err != nil {
382+
if err := explanationController.SetupWithManager(mgr, multiclusterClient); err != nil {
354383
setupLog.Error(err, "unable to create controller", "controller", "ExplanationController")
355384
os.Exit(1)
356385
}
357386
}
358387
if slices.Contains(config.EnabledControllers, "reservations-controller") {
359-
monitor := reservationscontroller.NewControllerMonitor(mgr.GetClient())
388+
monitor := reservationscontroller.NewControllerMonitor(multiclusterClient)
360389
metrics.Registry.MustRegister(&monitor)
361390
if err := (&reservationscontroller.ReservationReconciler{
362-
Client: mgr.GetClient(),
391+
Client: multiclusterClient,
363392
Scheme: mgr.GetScheme(),
364393
Conf: config,
365394
HypervisorClient: reservationscontroller.NewHypervisorClient(),
366-
}).SetupWithManager(mgr); err != nil {
395+
}).SetupWithManager(mgr, multiclusterClient); err != nil {
367396
setupLog.Error(err, "unable to create controller", "controller", "Reservation")
368397
os.Exit(1)
369398
}
@@ -372,20 +401,20 @@ func main() {
372401
monitor := datasources.NewMonitor()
373402
metrics.Registry.MustRegister(&monitor)
374403
if err := (&openstack.OpenStackDatasourceReconciler{
375-
Client: mgr.GetClient(),
404+
Client: multiclusterClient,
376405
Scheme: mgr.GetScheme(),
377406
Monitor: monitor,
378407
Conf: config,
379-
}).SetupWithManager(mgr); err != nil {
408+
}).SetupWithManager(mgr, multiclusterClient); err != nil {
380409
setupLog.Error(err, "unable to create controller", "controller", "OpenStackDatasourceReconciler")
381410
os.Exit(1)
382411
}
383412
if err := (&prometheus.PrometheusDatasourceReconciler{
384-
Client: mgr.GetClient(),
413+
Client: multiclusterClient,
385414
Scheme: mgr.GetScheme(),
386415
Monitor: monitor,
387416
Conf: config,
388-
}).SetupWithManager(mgr); err != nil {
417+
}).SetupWithManager(mgr, multiclusterClient); err != nil {
389418
setupLog.Error(err, "unable to create controller", "controller", "PrometheusDatasourceReconciler")
390419
os.Exit(1)
391420
}
@@ -394,29 +423,29 @@ func main() {
394423
monitor := extractor.NewMonitor()
395424
metrics.Registry.MustRegister(&monitor)
396425
if err := (&extractor.KnowledgeReconciler{
397-
Client: mgr.GetClient(),
426+
Client: multiclusterClient,
398427
Scheme: mgr.GetScheme(),
399428
Monitor: monitor,
400429
Conf: config,
401-
}).SetupWithManager(mgr); err != nil {
430+
}).SetupWithManager(mgr, multiclusterClient); err != nil {
402431
setupLog.Error(err, "unable to create controller", "controller", "KnowledgeReconciler")
403432
os.Exit(1)
404433
}
405434
if err := (&extractor.TriggerReconciler{
406-
Client: mgr.GetClient(),
435+
Client: multiclusterClient,
407436
Scheme: mgr.GetScheme(),
408437
Conf: config,
409-
}).SetupWithManager(mgr); err != nil {
438+
}).SetupWithManager(mgr, multiclusterClient); err != nil {
410439
setupLog.Error(err, "unable to create controller", "controller", "TriggerReconciler")
411440
os.Exit(1)
412441
}
413442
}
414443
if slices.Contains(config.EnabledControllers, "kpis-controller") {
415444
if err := (&kpis.Controller{
416-
Client: mgr.GetClient(),
445+
Client: multiclusterClient,
417446
SupportedKPIsByImpl: kpis.SupportedKPIsByImpl,
418447
OperatorName: config.Operator,
419-
}).SetupWithManager(mgr); err != nil {
448+
}).SetupWithManager(mgr, multiclusterClient); err != nil {
420449
setupLog.Error(err, "unable to create controller", "controller", "KPIController")
421450
os.Exit(1)
422451
}

0 commit comments

Comments
 (0)