@@ -20,6 +20,7 @@ import (
2020 "bufio"
2121 "context"
2222 "encoding/json"
23+ "errors"
2324 "fmt"
2425 "os"
2526 "sort"
@@ -67,15 +68,13 @@ type GPUConfigProfile struct {
6768
6869func (s * E2ESuite ) addRemoveNodeLabels (nodeName string , selectedProfile string ) {
6970 err := utils .AddNodeLabel (s .clientSet , nodeName , "dcm.amd.com/gpu-config-profile" , selectedProfile )
70- _ = utils .AddNodeLabel (s .clientSet , nodeName , "dcm.amd.com/apply-gpu-config-profile" , "apply" )
7171 if err != nil {
7272 logger .Infof ("Error adding node lbels: %s\n " , err .Error ())
7373 return
7474 }
75- time .Sleep (45 * time .Second )
75+ time .Sleep (15 * time .Second )
7676 // Allow partition to happen
7777 err = utils .DeleteNodeLabel (s .clientSet , nodeName , "dcm.amd.com/gpu-config-profile" )
78- _ = utils .DeleteNodeLabel (s .clientSet , nodeName , "dcm.amd.com/apply-gpu-config-profile" )
7978 if err != nil {
8079 logger .Infof ("Error removing node lbels: %s\n " , err .Error ())
8180 return
@@ -118,6 +117,7 @@ func (s *E2ESuite) GetPodName(ns string) (string, error) {
118117func (s * E2ESuite ) GetLatestEvents (ns string ) ([]corev1.Event , error ) {
119118
120119 dsName := s .cfgName + "-device-config-manager"
120+ logger .Infof ("dsName: %s\n " , dsName )
121121 fieldSelector := fields.Set {
122122 "involvedObject.name" : dsName ,
123123 }.AsSelector ().String ()
@@ -218,8 +218,9 @@ func (s *E2ESuite) createConfigMap() GPUConfigProfiles {
218218
219219 profiles_set1 := []* ProfileConfig {
220220 {
221- ComputePartition : "SPX " ,
221+ ComputePartition : "DPX " ,
222222 MemoryPartition : "NPS1" ,
223+ NumGPUsAssigned : 1 ,
223224 },
224225 }
225226
@@ -406,18 +407,20 @@ func (s *E2ESuite) TestDCMConfigMapPartitionHomogenous(c *C) {
406407 s .configMapHelper (c )
407408 // Trigger partition using labels
408409 logger .Infof ("Add node label after pod comes up" )
409- time .Sleep (30 * time .Second )
410+ time .Sleep (5 * time .Second )
410411
411412 nodeName := s .getWorkerNode (c )
412413 logger .Infof ("NODE NAME %v" , nodeName )
413414
414415 s .addRemoveNodeLabels (nodeName , "default" )
415416
417+ logger .Infof ("Getting pod logs" )
416418 logs := s .getLogs ()
417- if strings .Contains (logs , "Partition completed successfully" ) && (! strings .Contains (logs , "ERROR" )) && (s .eventHelper ("SuccessfullyPartitioned" , "Normal" )) {
419+
420+ if strings .Contains (logs , "Partition not required" ) || (strings .Contains (logs , "Partition completed successfully" ) && (! strings .Contains (logs , "ERROR" )) && (s .eventHelper ("SuccessfullyPartitioned" , "Normal" ))) {
418421 logger .Infof ("Successfully tested homogenous default partitioning" )
419422 } else {
420- logger . Errorf ( "Failure test homogenous partitioning" )
423+ assert . NoError ( c , errors . New ( "Test case Failed" ), "failure test homogenous partitioning" )
421424 }
422425}
423426
@@ -432,18 +435,19 @@ func (s *E2ESuite) TestDCMConfigMapPartitionHeterogenous(c *C) {
432435 s .configMapHelper (c )
433436 // Trigger partition using labels
434437 logger .Infof ("Add node label after pod comes up" )
435- time .Sleep (30 * time .Second )
438+ time .Sleep (5 * time .Second )
436439
437440 nodeName := s .getWorkerNode (c )
438441 logger .Infof ("NODE NAME %v" , nodeName )
439442
440443 s .addRemoveNodeLabels (nodeName , "e2e_profile1" )
441444
445+ logger .Infof ("Getting pod logs" )
442446 logs := s .getLogs ()
443- if strings .Contains (logs , "Partition completed successfully" ) && (! strings .Contains (logs , "ERROR" )) && (s .eventHelper ("SuccessfullyPartitioned" , "Normal" )) {
447+ if strings .Contains (logs , "Partition not required" ) || ( strings . Contains ( logs , "Partition completed successfully" ) && (! strings .Contains (logs , "ERROR" )) && (s .eventHelper ("SuccessfullyPartitioned" , "Normal" ) )) {
444448 logger .Infof ("Successfully tested heterogenous partitioning" )
445449 } else {
446- logger . Errorf ( "Failure test heterogenous partitioning" )
450+ assert . NoError ( c , errors . New ( "Test case Failed" ), "failure test heterogenous partitioning" )
447451 }
448452}
449453
@@ -458,131 +462,98 @@ func (s *E2ESuite) TestDCMPartitionNPS4(c *C) {
458462 s .configMapHelper (c )
459463 // Trigger partition using labels
460464 logger .Infof ("Add node label after pod comes up" )
461- time .Sleep (30 * time .Second )
465+ time .Sleep (5 * time .Second )
462466
463467 nodeName := s .getWorkerNode (c )
464468 logger .Infof ("NODE NAME %v" , nodeName )
465469
466470 s .addRemoveNodeLabels (nodeName , "e2e_profile2" )
467- time .Sleep (30 * time .Second )
468471
472+ logger .Infof ("Getting pod logs" )
469473 logs := s .getLogs ()
470- if strings .Contains (logs , "Partition completed successfully" ) && (! strings .Contains (logs , "ERROR" )) && (s .eventHelper ("SuccessfullyPartitioned" , "Normal" )) {
474+ if strings .Contains (logs , "Partition not required" ) || ( strings . Contains ( logs , "Partition completed successfully" ) && (! strings .Contains (logs , "ERROR" )) && (s .eventHelper ("SuccessfullyPartitioned" , "Normal" ) )) {
471475 logger .Infof ("Successfully tested NPS4 partitioning" )
472476 } else {
473- logger . Errorf ( "Failure test NPS4 partitioning" )
477+ assert . NoError ( c , errors . New ( "Test case Failed" ), "failure test NPS4 partitioning" )
474478 }
475479}
476480
477481func (s * E2ESuite ) TestDCMInvalidComputeType (c * C ) {
482+ if s .simEnable {
483+ c .Skip ("Skipping for non amd gpu testbed" )
484+ }
478485 if ! dcmImageDefined {
479486 c .Skip ("skip DCM test because E2E_DCM_IMAGE is not defined" )
480487 }
481488 c .Skip ("Skipping DCM Partition test for now, enable after fixing the test" )
482489 s .configMapHelper (c )
483490 // Trigger partition using labels
484491 logger .Infof ("Add node label after pod comes up" )
485- time .Sleep (30 * time .Second )
492+ time .Sleep (5 * time .Second )
486493
487494 nodeName := s .getWorkerNode (c )
488495 logger .Infof ("NODE NAME %v" , nodeName )
489496
490497 s .addRemoveNodeLabels (nodeName , "inval_prof1" )
491498
499+ logger .Infof ("Getting pod logs" )
492500 logs := s .getLogs ()
493- if strings .Contains (logs , "Invalid partition types" ) && (s .eventHelper ("InvalidComputeType " , "Warning" )) {
501+ if strings .Contains (logs , "Invalid partition types" ) && (s .eventHelper ("InvalidProfileInfo " , "Warning" )) {
494502 logger .Infof ("Successfully tested invalid compute type profile" )
495503 } else {
496- logger . Errorf ( "Failure testing invalid compute type" )
504+ assert . NoError ( c , errors . New ( "Test case Failed" ), "failure testing invalid compute type" )
497505 }
498506}
499507
500508func (s * E2ESuite ) TestDCMInvalidMemoryType (c * C ) {
509+ if s .simEnable {
510+ c .Skip ("Skipping for non amd gpu testbed" )
511+ }
501512 if ! dcmImageDefined {
502513 c .Skip ("skip DCM test because E2E_DCM_IMAGE is not defined" )
503514 }
504515 c .Skip ("Skipping DCM Partition test for now, enable after fixing the test" )
505516 s .configMapHelper (c )
506517 // Trigger partition using labels
507518 logger .Infof ("Add node label after pod comes up" )
508- time .Sleep (30 * time .Second )
519+ time .Sleep (5 * time .Second )
509520
510521 nodeName := s .getWorkerNode (c )
511522 logger .Infof ("NODE NAME %v" , nodeName )
512523
513524 s .addRemoveNodeLabels (nodeName , "inval_prof2" )
514525
526+ logger .Infof ("Getting pod logs" )
515527 logs := s .getLogs ()
516- if strings .Contains (logs , "Invalid partition types" ) && (s .eventHelper ("InvalidMemoryType " , "Warning" )) {
528+ if strings .Contains (logs , "Invalid partition types" ) && (s .eventHelper ("InvalidProfileInfo " , "Warning" )) {
517529 logger .Infof ("Successfully tested invalid memory type profile" )
518530 } else {
519- logger . Errorf ( "Failure testing invalid memory type" )
531+ assert . NoError ( c , errors . New ( "Test case Failed" ), "failure testing invalid memory type" )
520532 }
521533}
522534
523535func (s * E2ESuite ) TestDCMInvalidGPUFilter (c * C ) {
536+ if s .simEnable {
537+ c .Skip ("Skipping for non amd gpu testbed" )
538+ }
524539 if ! dcmImageDefined {
525540 c .Skip ("skip DCM test because E2E_DCM_IMAGE is not defined" )
526541 }
527542 c .Skip ("Skipping DCM Partition test for now, enable after fixing the test" )
528543 s .configMapHelper (c )
529544 // Trigger partition using labels
530545 logger .Infof ("Add node label after pod comes up" )
531- time .Sleep (30 * time .Second )
546+ time .Sleep (5 * time .Second )
532547
533548 nodeName := s .getWorkerNode (c )
534549 logger .Infof ("NODE NAME %v" , nodeName )
535550
536551 s .addRemoveNodeLabels (nodeName , "inval_prof3" )
537552
538- logs := s .getLogs ()
539- if strings .Contains (logs , "exceeding the total number" ) && strings .Contains (logs , "ERROR" ) && (s .eventHelper ("InvalidProfileInfo" , "Warning" )) {
553+ if s .eventHelper ("InvalidProfileInfo" , "Warning" ) {
540554 logger .Infof ("Successfully tested invalid GPU filter profile" )
541555 } else {
542- logger .Errorf ("Failure testing invalid GPU filter profile" )
543- }
544- }
545-
546- func (s * E2ESuite ) TestDCMDefaultPartition (c * C ) {
547- if s .simEnable {
548- c .Skip ("Skipping for non amd gpu testbed" )
549- }
550- if ! dcmImageDefined {
551- c .Skip ("skip DCM test because E2E_DCM_IMAGE is not defined" )
552- }
553- logger .Infof ("###BEGIN TESTCASE###\n " )
554- // check to see existing deviceconfig DS pods
555- _ , err := s .dClient .DeviceConfigs (s .ns ).Get (s .cfgName , metav1.GetOptions {})
556- assert .Errorf (c , err , fmt .Sprintf ("config %v exists" , s .cfgName ))
557-
558- // fetch the CR
559- devCfg := s .getDeviceConfigForDCM (c )
560- logger .Infof ("create device-config %+v" , devCfg .Spec .ConfigManager )
561- s .createDeviceConfig (devCfg , c )
562-
563- s .checkDeviceConfigManagerStatus (devCfg , s .ns , c )
564- logger .Infof ("SUCCESSFULLY DEPLOYED DCM DAEMONSET" )
565- time .Sleep (30 * time .Second )
566-
567- nodeName := s .getWorkerNode (c )
568- err = utils .AddNodeLabel (s .clientSet , nodeName , "dcm.amd.com/apply-gpu-config-profile" , "apply" )
569- if err != nil {
570- logger .Infof ("Error adding node lbels: %s\n " , err .Error ())
571- return
572- }
573- time .Sleep (15 * time .Second )
574- // Allow partition to happen
575- err = utils .DeleteNodeLabel (s .clientSet , nodeName , "dcm.amd.com/apply-gpu-config-profile" )
576- if err != nil {
577- logger .Infof ("Error removing node lbels: %s\n " , err .Error ())
578- return
579- }
580-
581- logs := s .getLogs ()
582- if strings .Contains (logs , "Partition completed successfully" ) && (! strings .Contains (logs , "ERROR" )) && (s .eventHelper ("SuccessfullyPartitioned" , "Normal" )) {
583- logger .Infof ("Successfully tested default partitioning" )
584- } else {
585- logger .Errorf ("Failure testing default partitioning" )
556+ assert .NoError (c , errors .New ("Test case Failed" ), "failure testing invalid GPdvsdU filter profile" )
586557 }
587558}
588559
0 commit comments