Skip to content

Commit 31a75a4

Browse files
Add major channel configuration in the prolog to configure IMEX
This configuration follows [Nvidia doc](https://docs.nvidia.com/multi-node-nvlink-systems/imex-guide/imexchannels.html). This configuration is only suitable for single user environment, and not compatible with multi-user environment. Co-authored-by: Himani Anil Deshpande <[email protected]>
1 parent 62d2a40 commit 31a75a4

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

tests/integration-tests/tests/ultraserver/test_gb200/test_gb200/91_nvidia_imex_prolog.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,32 @@ function reload_imex() {
141141
timeout ${IMEX_START_TIMEOUT} systemctl start ${IMEX_SERVICE}
142142
}
143143

144+
function create_default_imex_channel() {
145+
# This configuration follows
146+
# [Nvidia doc](https://docs.nvidia.com/multi-node-nvlink-systems/imex-guide/imexchannels.html)
147+
# This configuration is only suitable for single user environment, and not compatible with multi-user environment.
148+
info "Creating IMEX default Channel"
149+
MAJOR_NUMBER=$(cat /proc/devices | grep nvidia-caps-imex-channels | cut -d' ' -f1)
150+
if [ ! -d "/dev/nvidia-caps-imex-channels" ]; then
151+
sudo mkdir /dev/nvidia-caps-imex-channels
152+
fi
153+
154+
# Then check and create device node
155+
if [ ! -e "/dev/nvidia-caps-imex-channels/channel0" ]; then
156+
sudo mknod /dev/nvidia-caps-imex-channels/channel0 c $MAJOR_NUMBER 0
157+
info "IMEX default Channel created"
158+
else
159+
info "IMEX default Channel already exists"
160+
fi
161+
}
162+
144163
{
145164
info "PROLOG Start JobId=${SLURM_JOB_ID}: $0"
146165

147166
return_unless_gb200_with_imex
148167

168+
create_default_imex_channel
169+
149170
QUEUE_NAME=$(get_dna_parameter "scheduler_queue_name")
150171
COMPUTE_RESOURCE_NAME=$(get_dna_parameter "scheduler_compute_resource_name")
151172
LAUNCH_TEMPLATE_ID=$(get_dna_parameter "launch_template_id")

0 commit comments

Comments
 (0)