diff --git a/ansible/adhoc/cudatests.yml b/ansible/adhoc/cudatests.yml index f571f8a89..0f5cf78a0 100644 --- a/ansible/adhoc/cudatests.yml +++ b/ansible/adhoc/cudatests.yml @@ -2,8 +2,9 @@ - hosts: cuda become: true gather_facts: true - tags: cuda_samples + tags: cuda_bandwidth tasks: - - ansible.builtin.import_role: + - name: Run CUDA bandwidth tasks + ansible.builtin.import_role: name: cuda - tasks_from: samples.yml + tasks_from: bandwidth.yml diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml index 19720309c..44368fd0e 100644 --- a/ansible/roles/cuda/defaults/main.yml +++ b/ansible/roles/cuda/defaults/main.yml @@ -21,3 +21,7 @@ cuda_samples_programs: # cuda_devices: # discovered from deviceQuery run cuda_persistenced_state: started cuda_install_nvidiafabricmanger: false +# variables for nvbandwidth (for bandwidth.yml tasks run in cudatests.yml) +cuda_bandwidth_version: '0.8' +cuda_bandwidth_path: "/var/lib/{{ ansible_user }}/cuda_bandwidth" +cuda_bandwidth_release_url: "https://github.com/NVIDIA/nvbandwidth/archive/refs/tags/v{{ cuda_bandwidth_version }}.tar.gz" diff --git a/ansible/roles/cuda/tasks/bandwidth.yml b/ansible/roles/cuda/tasks/bandwidth.yml new file mode 100644 index 000000000..58f57dd73 --- /dev/null +++ b/ansible/roles/cuda/tasks/bandwidth.yml @@ -0,0 +1,57 @@ +--- +- name: Ensure CUDA bandwidth path exists + ansible.builtin.file: + state: directory + path: "{{ cuda_bandwidth_path }}" + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: "0755" + +- name: Download CUDA bandwith test release + ansible.builtin.unarchive: + remote_src: true + src: "{{ cuda_bandwidth_release_url }}" + dest: "{{ cuda_bandwidth_path }}" + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + creates: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}" + +- name: Creates CUDA bandwidth test build directory + ansible.builtin.file: + state: directory + path: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build" + mode: "0755" + +- name: Ensure cudatests directory exists + ansible.builtin.file: + path: "{{ appliances_environment_root }}/cudatests" + state: directory + mode: '0755' + delegate_to: localhost + +- name: Build CUDA bandwidth test + ansible.builtin.shell: + cmd: > + source /cvmfs/software.eessi.io/versions/2023.06/init/bash && + module load buildenv/default-foss-2023b && + module load Boost/1.82.0-GCC-12.3.0 && + . /etc/profile.d/sh.local && + cmake .. && + make -j {{ ansible_processor_vcpus }} + chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build" + creates: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build/nvbandwidth" + +- name: Run CUDA bandwidth test + ansible.builtin.shell: | + ./nvbandwidth + args: + chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-{{ cuda_bandwidth_version }}/build/" + register: cuda_bandwidth_output + changed_when: true + +- name: Save CUDA bandwidth output to bandwidth_results.txt + ansible.builtin.copy: + content: "{{ cuda_bandwidth_output.stdout }}" + dest: "{{ appliances_environment_root }}/cudatests/{{ inventory_hostname }}bandwidth_results.txt" + mode: '0644' + delegate_to: localhost