diff --git a/tests/lib/Qemu.py b/tests/lib/Qemu.py index 0c637a9a..a9a7edfd 100644 --- a/tests/lib/Qemu.py +++ b/tests/lib/Qemu.py @@ -629,33 +629,45 @@ def run_and_wait(self): self.run() QemuMonitor(self) - def communicate(self): + def communicate(self, timeout=60): """ Wait for qemu to exit """ - self.out, self.err = self.proc.communicate(timeout=60) + self.out, self.err = self.proc.communicate(timeout=timeout) if self.proc.returncode != 0: print(self.err.decode()) return self.out, self.err - def stop(self): + def shutdown(self): """ - Stop qemu process + Send shutdown command to the VM + Do not wait for the VM to exit + Return false if the VM is already terminated """ if self.proc is None: - return + return False if self.proc.returncode is not None: - return + return False + + try: + mon = QemuMonitor(self) + mon.powerdown() + except Exception as e: + pass - # self.proc.returncode == None -> not yet terminated + return True + + def stop(self): + """ + Stop qemu process + """ + if not self.shutdown(): + return try: # try to shutdown the VM properly, this is important to avoid # rootfs corruption if we want to run the guest again # catch exception and ignore it since we are stopping .... no need to fail the test - mon = QemuMonitor(self) - mon.powerdown() - self.communicate() return except Exception as e: diff --git a/tests/lib/util.py b/tests/lib/util.py index 42825bae..6e15fc87 100644 --- a/tests/lib/util.py +++ b/tests/lib/util.py @@ -45,6 +45,16 @@ def tcp_port_available(): return port def get_max_td_vms(): + """ + MKTME encryption engine is used both for legacy MKTME operation and TDX operation + The key space is partitionned in 3 ranges: + - first key + - shared keys + - TDX keys + So if we have 128 keys and we decide to split this range into 2 equal sets (in BIOS) + TDX key space will only have 63 keys instead of 64. + The nb of TDX key space can be read from the IA32_MKTME_KEYID_PARTITIONING MSR (0x87) + """ cmd = ['rdmsr', '0x87'] rc = subprocess.run(cmd, capture_output=True) assert rc.returncode == 0, "Failed getting max td vms" diff --git a/tests/tests/stress/test_stress_resources.py b/tests/tests/stress/test_stress_resources.py index 2f26186d..9b5052c2 100644 --- a/tests/tests/stress/test_stress_resources.py +++ b/tests/tests/stress/test_stress_resources.py @@ -17,6 +17,7 @@ import subprocess import time import multiprocessing +import pytest import Qemu import util @@ -70,15 +71,47 @@ def test_stress_max_vcpus(qm): qm.stop() +def check_qemu_fail_to_start(qm, error_msg=None): + try: + _, err = qm.communicate(timeout=5) + except: + # if timeout, that means the QEMU is running fine + # try to connect with ssh to make sure the TD is running fine + try: + ssh = Qemu.QemuSSH(qm) + except: + # the qemu is running but we cannot connect to SSH + # we consider that the check is OK + qm.stop() + return + pytest.fail('The TD is running !') + if error_msg: + assert error_msg in err.decode() def test_stress_max_guests(): """ Test max guests (No Intel Case ID) + + There is a limit on the number of TDs that can be run in parralel. + This limit can be due to several factors, but the most prevalent factor + is the number of keys the CPU can allocate to TDs. + In fact, TDX takes advantage of an existing CPU feature called MK-TME + (Multi-key Total Memory Encryption) to encrypt the VM memory. It enables + the CPU to encrypt each TD’s memory with a unique Advanced Encryption Standard (AES) key. + MK-TME offers a number of keys and this key space is partionned into 2 sets: + Shared (VMM) and Private (TDX). The number of key in the Private space defines the + maximum number of TDs we can run in parralel. + + This test verifies that we can run TDs up to this limit and any new TD creation + is refused by qemu in a nice way. """ # get max number of TD VMs we can create (max - current) max_td_vms = util.get_max_td_vms() - util.get_current_td_vms() assert max_td_vms > 0, "No available space for TD VMs" + + print(f'The limit number of TDs is : {max_td_vms}') + qm = [None] * max_td_vms # initialize machines @@ -95,8 +128,21 @@ def test_stress_max_guests(): print("Waiting for machine %d" % (i)) ssh = Qemu.QemuSSH(qm[i]) + # try to run a new TD + # expect qemu quit immediately with a specific error message + with Qemu.QemuMachine() as one_more: + one_more.run() + check_qemu_fail_to_start(one_more, error_msg="KVM_TDX_INIT_VM failed: No space left on device") + # stop all machines for i in range(max_td_vms): print("Stopping machine %d" % (i)) - qm[i].stop() + qm[i].shutdown() + # wait for all machines to exit + for i in range(max_td_vms): + print("Stopping machine %d" % (i)) + try: + qm[i].communicate() + except: + pass