2828//! - `IbvWc`: Wrapper around ibverbs work completion structure, used to track the status of RDMA operations. 
2929use  std:: ffi:: CStr ; 
3030use  std:: fmt; 
31+ use  std:: sync:: OnceLock ; 
3132
3233use  hyperactor:: Named ; 
3334use  serde:: Deserialize ; 
@@ -168,7 +169,7 @@ impl IbverbsConfig {
168169/// 
169170/// Device targets use a unified "type:id" format: 
170171/// - "cpu:N" -> finds RDMA device closest to NUMA node N 
171- /// - "cuda:N" -> finds RDMA device closest to CUDA device N    
172+ /// - "cuda:N" -> finds RDMA device closest to CUDA device N 
172173/// - "nic:mlx5_N" -> returns the specified NIC directly 
173174/// 
174175/// Shortcuts: 
@@ -630,16 +631,81 @@ pub fn get_all_devices() -> Vec<RdmaDevice> {
630631    devices
631632} 
632633
634+ /// Cached result of mlx5dv support check. 
635+ static  MLX5DV_SUPPORTED_CACHE :  OnceLock < bool >  = OnceLock :: new ( ) ; 
636+ 
637+ /// Checks if mlx5dv (Mellanox device-specific verbs extension) is supported. 
638+ /// 
639+ /// This function attempts to open the first available RDMA device and check if 
640+ /// mlx5dv extensions can be initialized. The mlx5dv extensions are required for 
641+ /// advanced features like GPU Direct RDMA and direct queue pair manipulation. 
642+ /// 
643+ /// The result is cached after the first call, making subsequent calls essentially free. 
644+ /// 
645+ /// # Returns 
646+ /// 
647+ /// `true` if mlx5dv extensions are supported, `false` otherwise. 
648+ pub  fn  mlx5dv_supported ( )  -> bool  { 
649+     * MLX5DV_SUPPORTED_CACHE . get_or_init ( mlx5dv_supported_impl) 
650+ } 
651+ 
652+ fn  mlx5dv_supported_impl ( )  -> bool  { 
653+     // SAFETY: We are calling C functions from libibverbs and libmlx5. 
654+     unsafe  { 
655+         let  mut  num_devices = 0 ; 
656+         let  device_list = rdmaxcel_sys:: ibv_get_device_list ( & mut  num_devices) ; 
657+ 
658+         // Compute result in a block, ensuring cleanup happens afterward 
659+         let  result = { 
660+             if  device_list. is_null ( )  || num_devices == 0  { 
661+                 false 
662+             }  else  { 
663+                 // Try to open the first device and check mlx5dv support 
664+                 let  device = * device_list; 
665+                 let  mut  mlx5dv_supported = false ; 
666+ 
667+                 if  !device. is_null ( )  { 
668+                     let  context = rdmaxcel_sys:: ibv_open_device ( device) ; 
669+                     if  !context. is_null ( )  { 
670+                         // Try to query device capabilities with mlx5dv 
671+                         let  mut  attrs_out = rdmaxcel_sys:: mlx5dv_context:: default ( ) ; 
672+ 
673+                         // mlx5dv_query_device returns 0 on success 
674+                         if  rdmaxcel_sys:: mlx5dv_query_device ( context,  & mut  attrs_out)  == 0  { 
675+                             mlx5dv_supported = true ; 
676+                         } 
677+ 
678+                         rdmaxcel_sys:: ibv_close_device ( context) ; 
679+                     } 
680+                 } 
681+                 mlx5dv_supported
682+             } 
683+         } ; 
684+ 
685+         rdmaxcel_sys:: ibv_free_device_list ( device_list) ; 
686+         result
687+     } 
688+ } 
689+ 
690+ /// Cached result of ibverbs support check. 
691+ static  IBVERBS_SUPPORTED_CACHE :  OnceLock < bool >  = OnceLock :: new ( ) ; 
692+ 
633693/// Checks if ibverbs devices can be retrieved successfully. 
634694/// 
635695/// This function attempts to retrieve the list of RDMA devices using the 
636696/// `ibv_get_device_list` function from the ibverbs library. It returns `true` 
637697/// if devices are found, and `false` otherwise. 
638698/// 
699+ /// The result is cached after the first call, making subsequent calls essentially free. 
700+ /// 
639701/// # Returns 
640702/// 
641703/// `true` if devices are successfully retrieved, `false` otherwise. 
642704pub  fn  ibverbs_supported ( )  -> bool  { 
705+     * IBVERBS_SUPPORTED_CACHE . get_or_init ( ibverbs_supported_impl) 
706+ } 
707+ 
708+ fn  ibverbs_supported_impl ( )  -> bool  { 
643709    // SAFETY: We are calling a C function from libibverbs. 
644710    unsafe  { 
645711        let  mut  num_devices = 0 ; 
@@ -651,6 +717,25 @@ pub fn ibverbs_supported() -> bool {
651717    } 
652718} 
653719
720+ /// Checks if RDMA is fully supported on this system. 
721+ /// 
722+ /// This is the canonical function to check if RDMA can be used. It verifies both: 
723+ /// 1. Basic ibverbs device availability (`ibverbs_supported()`) 
724+ /// 2. mlx5dv device-specific extensions (`mlx5dv_supported()`) 
725+ /// 
726+ /// mlx5dv extensions are required for this library's advanced features including 
727+ /// GPU Direct RDMA and direct queue pair manipulation. Systems with non-Mellanox 
728+ /// RDMA devices will have `ibverbs_supported() == true` but `rdma_supported() == false`. 
729+ /// 
730+ /// The result is cached after the first call, making subsequent calls essentially free. 
731+ /// 
732+ /// # Returns 
733+ /// 
734+ /// `true` if both ibverbs devices and mlx5dv extensions are available, `false` otherwise. 
735+ pub  fn  rdma_supported ( )  -> bool  { 
736+     ibverbs_supported ( )  && mlx5dv_supported ( ) 
737+ } 
738+ 
654739/// Represents a view of a memory region that can be registered with an RDMA device. 
655740/// 
656741/// This is a 'view' of a registered Memory Region, allowing multiple views into a single 
@@ -1016,4 +1101,31 @@ mod tests {
10161101        let  formatted = format_gid ( & gid) ; 
10171102        assert_eq ! ( formatted,  "1234:5678:9abc:def0:1122:3344:5566:7788" ) ; 
10181103    } 
1104+ 
1105+     #[ test]  
1106+     fn  test_mlx5dv_supported_basic ( )  { 
1107+         // The test just verifies the function doesn't panic 
1108+         let  mlx5dv_support = mlx5dv_supported ( ) ; 
1109+         println ! ( "mlx5dv_supported: {}" ,  mlx5dv_support) ; 
1110+     } 
1111+ 
1112+     #[ test]  
1113+     fn  test_rdma_supported_combines_checks ( )  { 
1114+         // This test verifies that rdma_supported() properly combines both checks 
1115+         let  ibverbs_support = ibverbs_supported ( ) ; 
1116+         let  mlx5dv_support = mlx5dv_supported ( ) ; 
1117+         let  rdma_support = rdma_supported ( ) ; 
1118+ 
1119+         // rdma_supported should be true only if both checks pass 
1120+         assert_eq ! ( 
1121+             rdma_support, 
1122+             ibverbs_support && mlx5dv_support, 
1123+             "rdma_supported should equal (ibverbs_supported && mlx5dv_supported)" 
1124+         ) ; 
1125+ 
1126+         println ! ( 
1127+             "ibverbs_supported: {}, mlx5dv_supported: {}, rdma_supported: {}" , 
1128+             ibverbs_support,  mlx5dv_support,  rdma_support
1129+         ) ; 
1130+     } 
10191131} 
0 commit comments