ceph · lee-j-sanders · Jan 30, 2025 · Feb 6, 2025 · Feb 11, 2025 · Feb 12, 2025
diff --git a/benchmark/librbdfio.py b/benchmark/librbdfio.py
@@ -9,6 +9,7 @@
 import settings
 import monitoring
 
+from typing import Optional
 from .benchmark import Benchmark
 
 logger = logging.getLogger("cbt")
@@ -28,6 +29,8 @@ def __init__(self, archive_dir, cluster, config):
         self.recov_test_type = config.get('recov_test_type', 'blocking')
         self.data_pool_profile = config.get('data_pool_profile', None)
         self.time = config.get('time', None)
+        self.precond_time = config.get('precond_time',None )
+
         # Global FIO options can be overwritten for specific workload options
         # would be nice to have them as a separate class -- future PR
         self.time_based = bool(config.get('time_based', False))
@@ -50,6 +53,14 @@ def __init__(self, archive_dir, cluster, config):
         self.rate_iops = config.get('rate_iops', None)
         self.fio_out_format = config.get('fio_out_format', 'json,normal')
         self.data_pool = None
+
+        self._ioddepth_per_volume: dict[int, int] = {}
+        total_iodepth: Optional[str] = config.get("total_iodepth", None)
+        if total_iodepth is not None:
+            self._ioddepth_per_volume = self._calculate_iodepth_per_volume(
+                int(self.volumes_per_client), int(total_iodepth)
+            )
+
         # use_existing_volumes needs to be true to set the pool and rbd names
         self.use_existing_volumes = bool(config.get('use_existing_volumes', False))
         self.no_sudo = bool(config.get('no_sudo', False))
@@ -163,19 +174,51 @@ def run_workloads(self):
                 enable_monitor = bool(test['monitor'])
             # TODO: simplify this loop to have a single iterator for general queu depth
             for job in test['numjobs']:
-                for iod in test['iodepth']:
+                iodepth: list[str] = []
+                use_total_iodepth: bool = False
+                if "total_iodepth" in test.keys():
+                    iodepth = test["total_iodepth"]
+                    use_total_iodepth = True
+                else:
+                    iodepth = test["iodepth"]
+                for iod in iodepth:
+                    if use_total_iodepth:
+                        self._ioddepth_per_volume = self._calculate_iodepth_per_volume(
+                            int(self.volumes_per_client), int(iod)
+                        )
                     self.mode = test['mode']
                     if 'op_size' in test:
                         self.op_size = test['op_size']
+                    if 'precond' in test:
+                        fioruntime = self.precond_time
+                    else:
+                        fioruntime = self.time
+
                     self.mode = test['mode']
                     self.numjobs = job
                     self.iodepth = iod
-                    self.run_dir =  ( f'{self.base_run_dir}/{self.mode}_{int(self.op_size)}/'
-                                     f'iodepth-{int(self.iodepth):03d}/numjobs-{int(self.numjobs):03d}' )
+
+                    # Needed to allow for different mixed ratio results with the same block size, we
+                    # store the ratio within the directory name. Otherwise workloads would only support
+                    # 1 mixed workload for a given block size. For 100% read, 100% write don't need to
+                    # store the read/write ratio.
+
+                    if self.mode == 'randrw':
+                        self.rwmixread = test['rwmixread']
+                        self.rwmixwrite = 100 - self.rwmixread
+                        self.run_dir =  ( f'{self.base_run_dir}/{self.mode}{self.rwmixread}{self.rwmixwrite}_{int(self.op_size)}/'
+                                          f'iodepth-{int(self.iodepth):03d}/numjobs-{int(self.numjobs):03d}' )
+                    else:
+                        self.run_dir =  ( f'{self.base_run_dir}/{self.mode}_{int(self.op_size)}/'
+                                          f'iodepth-{int(self.iodepth):03d}/numjobs-{int(self.numjobs):03d}' )
+
                     common.make_remote_dir(self.run_dir)
 
-                    for i in range(self.volumes_per_client):
-                        fio_cmd = self.mkfiocmd(i)
+                    number_of_volumes: int = int(self.volumes_per_client)
+                    if use_total_iodepth:
+                        number_of_volumes = len(self._ioddepth_per_volume.keys())
+                    for i in range(number_of_volumes):
+                        fio_cmd = self.mkfiocmd(i,fioruntime)
                         p = common.pdsh(settings.getnodes('clients'), fio_cmd)
                         ps.append(p)
                     if enable_monitor:
@@ -226,8 +269,11 @@ def run(self):
             monitoring.start(self.run_dir)
             logger.info('Running rbd fio %s test.', self.mode)
             ps = []
-            for i in range(self.volumes_per_client):
-                fio_cmd = self.mkfiocmd(i)
+            number_of_volumes: int = int(self.volumes_per_client)
+            if self._ioddepth_per_volume != {}:
+                number_of_volumes = len(self._ioddepth_per_volume.keys())
+            for i in range(number_of_volumes):
+                fio_cmd = self.mkfiocmd(i,self.time)
                 p = common.pdsh(settings.getnodes('clients'), fio_cmd)
                 ps.append(p)
             for p in ps:
@@ -243,8 +289,7 @@ def run(self):
         common.sync_files(f'{self.run_dir}/*', self.out_dir)
         self.analyze(self.out_dir)
 
-
-    def mkfiocmd(self, volnum):
+    def mkfiocmd(self, volnum: int, time) -> str:
         """
         Construct a FIO cmd (note the shell interpolation for the host
         executing FIO).
@@ -257,24 +302,29 @@ def mkfiocmd(self, volnum):
         logger.debug('Using rbdname %s', rbdname)
         out_file = f'{self.run_dir}/output.{volnum:d}'
 
-        fio_cmd = ''
+        fio_cmd: str = ''
         if not self.no_sudo:
             fio_cmd = 'sudo '
         fio_cmd += '%s --ioengine=rbd --clientname=admin --pool=%s --rbdname=%s --invalidate=0' % (self.cmd_path, self.pool_name, rbdname)
         fio_cmd += ' --rw=%s' % self.mode
         fio_cmd += ' --output-format=%s' % self.fio_out_format
         if (self.mode == 'readwrite' or self.mode == 'randrw'):
             fio_cmd += ' --rwmixread=%s --rwmixwrite=%s' % (self.rwmixread, self.rwmixwrite)
-        if self.time is not None:
-            fio_cmd += ' --runtime=%d' % self.time
+        if time is not None:
+            fio_cmd += ' --runtime=%d' % time
         if self.time_based is True:
             fio_cmd += ' --time_based'
         if self.ramp is not None:
             fio_cmd += ' --ramp_time=%d' % self.ramp
         fio_cmd += ' --numjobs=%s' % self.numjobs
         fio_cmd += ' --direct=1'
         fio_cmd += ' --bs=%dB' % self.op_size
-        fio_cmd += ' --iodepth=%d' % self.iodepth
+
+        iodepth: str = f"{self.iodepth}"
+        if self._ioddepth_per_volume != {}:
+            iodepth = f"{self._ioddepth_per_volume[volnum]}"
+
+        fio_cmd += ' --iodepth=%s' % iodepth
         fio_cmd += ' --end_fsync=%d' % self.end_fsync
 #        if self.vol_size:
 #            fio_cmd += ' -- size=%dM' % self.vol_size
@@ -401,6 +451,40 @@ def analyze(self, out_dir):
         logger.info('Convert results to json format.')
         self.parse(out_dir)
 
+    def _calculate_iodepth_per_volume(self, number_of_volumes: int, total_desired_iodepth: int) -> dict[int, int]:
+        """
+        Given the total desired iodepth and the number of volumes from the
+        configuration yaml file, calculate the iodepth for each volume
+
+        If the iodepth specified in total_iodepth is too small to allow
+        an iodepth of 1 per volume, then reduce the number of volumes
+        used to allow an iodepth of 1 per volume.
+        """
+        queue_depths: dict[int, int] = {}
+
+        if number_of_volumes > total_desired_iodepth:
+            logger.warning(
+                "The total iodepth requested: %s is less than 1 per volume (%s)",
+                total_desired_iodepth,
+                number_of_volumes,
+            )
+            logger.warning(
+                "Number of volumes per client will be reduced from %s to %s", number_of_volumes, total_desired_iodepth
+            )
+            number_of_volumes = total_desired_iodepth
+
+        iodepth_per_volume: int = total_desired_iodepth // number_of_volumes
+        remainder: int = total_desired_iodepth % number_of_volumes
+
+        for volume_id in range(number_of_volumes):
+            iodepth: int = iodepth_per_volume
+
+            if remainder > 0:
+                iodepth += 1
+                remainder -= 1
+            queue_depths[volume_id] = iodepth
+
+        return queue_depths
 
     def __str__(self):
         return "%s\n%s\n%s" % (self.run_dir, self.out_dir, super(LibrbdFio, self).__str__())
diff --git a/tests/test_bm_librbdfio.py b/tests/test_bm_librbdfio.py
@@ -16,7 +16,7 @@ class TestBenchmarklibrbdfio(unittest.TestCase):
     cl_name = "tools/invariant.yaml"
     bl_name = "tools/baseline.json"
     bl_json = {}
-    bl_md5 = 'e6b6fcd2be74bd08939c64a249ab2125'
+    bl_md5 = '84dd2f3a66eab442cc3825e0d57a9e3f'
     md5_returned = None
 
     @classmethod
@@ -40,6 +40,12 @@ def test_valid_baseline(self):
         """ Verify the baseline has not been compromised """
         self.assertEqual( self.bl_md5, str(self.md5_returned) )
 
+    def test_valid__ioddepth_per_volume(self):
+        """ Basic sanity attribute identity _ioddepth_per_volume check"""
+        b = benchmarkfactory.get_object(self.archive_dir,
+                                            self.cluster, 'librbdfio', self.iteration)
+        self.assertEqual(self.bl_json['librbdfio']['_ioddepth_per_volume'], b.__dict__['_ioddepth_per_volume'])
+
     def test_valid_archive_dir(self):
         """ Basic sanity attribute identity archive_dir check"""
         b = benchmarkfactory.get_object(self.archive_dir,
@@ -208,6 +214,12 @@ def test_valid_pool_profile(self):
                                             self.cluster, 'librbdfio', self.iteration)
         self.assertEqual(self.bl_json['librbdfio']['pool_profile'], b.__dict__['pool_profile'])
 
+    def test_valid_precond_time(self):
+        """ Basic sanity attribute identity precond_time check"""
+        b = benchmarkfactory.get_object(self.archive_dir,
+                                            self.cluster, 'librbdfio', self.iteration)
+        self.assertEqual(self.bl_json['librbdfio']['precond_time'], b.__dict__['precond_time'])
+
     def test_valid_prefill_vols(self):
         """ Basic sanity attribute identity prefill_vols check"""
         b = benchmarkfactory.get_object(self.archive_dir,

diff --git a/tools/baseline.json b/tools/baseline.json
@@ -727,6 +727,7 @@
         "vol_size": 58982.4
     },
     "librbdfio": {
+        "_ioddepth_per_volume": {},
         "acceptable": {},
         "archive_dir": "/tmp/results/00000000/id-83a653b5",
         "base_run_dir": "/tmp/cbt.XYZ/00000000/LibrbdFio",
@@ -865,6 +866,7 @@
         "pgs": 2048,
         "pool_name": "cbt-librbdfio",
         "pool_profile": "default",
+        "precond_time": null,
         "prefill_vols": {
             "blocksize": "4M",
             "numjobs": "1"