diff --git a/.changes/next-release/enhancement-crt-82800.json b/.changes/next-release/enhancement-crt-82800.json new file mode 100644 index 000000000000..e7b2ba0db52c --- /dev/null +++ b/.changes/next-release/enhancement-crt-82800.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "awscrt", + "description": "Exposes new CRT options for S3 file IO" +} diff --git a/awscli/customizations/s3/factory.py b/awscli/customizations/s3/factory.py index 8bcf2710dd75..ca8c8937181a 100644 --- a/awscli/customizations/s3/factory.py +++ b/awscli/customizations/s3/factory.py @@ -138,6 +138,15 @@ def _create_crt_client(self, params, runtime_config): create_crt_client_kwargs['crt_credentials_provider'] = ( crt_credentials_provider ) + fio_options = {} + if (val := runtime_config.get('should_stream')) is not None: + fio_options['should_stream'] = val + if (val := runtime_config.get('disk_throughput')) is not None: + # Convert bytes to gigabits. + fio_options['disk_throughput_gbps'] = val * 8 / 1_000_000_000 + if (val := runtime_config.get('direct_io')) is not None: + fio_options['direct_io'] = val + create_crt_client_kwargs['fio_options'] = fio_options return create_s3_crt_client(**create_crt_client_kwargs) diff --git a/awscli/customizations/s3/transferconfig.py b/awscli/customizations/s3/transferconfig.py index 5227217d868c..5502ea93b0cc 100644 --- a/awscli/customizations/s3/transferconfig.py +++ b/awscli/customizations/s3/transferconfig.py @@ -15,6 +15,7 @@ # commands. import logging +from botocore.utils import ensure_boolean from s3transfer.manager import TransferConfig from awscli.customizations.s3 import constants @@ -31,6 +32,9 @@ 'preferred_transfer_client': constants.AUTO_RESOLVE_TRANSFER_CLIENT, 'target_bandwidth': None, 'io_chunksize': 256 * 1024, + 'should_stream': None, + 'disk_throughput': None, + 'direct_io': None, } @@ -47,9 +51,18 @@ class RuntimeConfig: 'max_bandwidth', 'target_bandwidth', 'io_chunksize', + 'disk_throughput', + ] + HUMAN_READABLE_SIZES = [ + 'multipart_chunksize', + 'multipart_threshold', + 'io_chunksize', + ] + HUMAN_READABLE_RATES = [ + 'max_bandwidth', + 'target_bandwidth', + 'disk_throughput', ] - HUMAN_READABLE_SIZES = ['multipart_chunksize', 'multipart_threshold', 'io_chunksize'] - HUMAN_READABLE_RATES = ['max_bandwidth', 'target_bandwidth'] SUPPORTED_CHOICES = { 'preferred_transfer_client': [ constants.AUTO_RESOLVE_TRANSFER_CLIENT, @@ -62,6 +75,7 @@ class RuntimeConfig: 'default': constants.CLASSIC_TRANSFER_CLIENT } } + BOOLEANS = ['should_stream', 'direct_io'] @staticmethod def defaults(): @@ -83,6 +97,7 @@ def build_config(self, **kwargs): runtime_config.update(kwargs) self._convert_human_readable_sizes(runtime_config) self._convert_human_readable_rates(runtime_config) + self._convert_booleans(runtime_config) self._resolve_choice_aliases(runtime_config) self._validate_config(runtime_config) return runtime_config @@ -116,6 +131,12 @@ def _convert_human_readable_rates(self, runtime_config): 'second (e.g. 10Mb/s or 800Kb/s)' % value ) + def _convert_booleans(self, runtime_config): + for attr in self.BOOLEANS: + value = runtime_config.get(attr) + if value is not None: + runtime_config[attr] = ensure_boolean(value) + def _human_readable_rate_to_int(self, value): # The human_readable_to_int() utility only supports integers (e.g. 1024) # as strings and human readable sizes (e.g. 10MB, 5GB). It does not diff --git a/awscli/s3transfer/crt.py b/awscli/s3transfer/crt.py index c110e29fdcbd..0d51b3d63a44 100644 --- a/awscli/s3transfer/crt.py +++ b/awscli/s3transfer/crt.py @@ -33,6 +33,7 @@ ) from awscrt.s3 import ( S3Client, + S3FileIoOptions, S3RequestTlsMode, S3RequestType, S3ResponseError, @@ -87,6 +88,7 @@ def create_s3_crt_client( part_size=8 * MB, use_ssl=True, verify=None, + fio_options=None, ): """ :type region: str @@ -130,6 +132,9 @@ def create_s3_crt_client( * path/to/cert/bundle.pem - A filename of the CA cert bundle to use. Specify this argument if you want to use a custom CA cert bundle instead of the default one on your system. + + :type fio_options: Optional[dict] + :param fio_options: Kwargs to use to build an `awscrt.s3.S3FileIoOptions`. """ event_loop_group = EventLoopGroup(num_threads) @@ -153,6 +158,9 @@ def create_s3_crt_client( target_gbps = _get_crt_throughput_target_gbps( provided_throughput_target_bytes=target_throughput ) + crt_fio_options = None + if fio_options: + crt_fio_options = S3FileIoOptions(**fio_options) return S3Client( bootstrap=bootstrap, region=region, @@ -162,6 +170,7 @@ def create_s3_crt_client( tls_connection_options=tls_connection_options, throughput_target_gbps=target_gbps, enable_s3express=True, + fio_options=crt_fio_options, ) diff --git a/awscli/topics/s3-config.rst b/awscli/topics/s3-config.rst index c7979d230bcc..2857691a514e 100644 --- a/awscli/topics/s3-config.rst +++ b/awscli/topics/s3-config.rst @@ -382,6 +382,50 @@ adjustments mid-transfer command in order to increase throughput and reach the requested bandwidth. +should_stream +------------- +.. note:: + This configuration option is only supported when the ``preferred_transfer_client`` + configuration value is set to or resolves to ``crt``. The ``classic`` transfer + client does not support this configuration option. + +**Default** - ``false`` + +If set to ``true``, the CRT client will skip buffering parts in-memory before +sending PUT requests. + + +disk_throughput +--------------- +.. note:: + This configuration option is only supported when the ``preferred_transfer_client`` + configuration value is set to or resolves to ``crt``. The ``classic`` transfer + client does not support this configuration option. + +**Default** - ``10.0`` + +The estimated target disk throughput. This value is only applied if +``should_stream`` is set to ``true``. This value can be specified using +the same semantics as ``target_throughput``, that is either as the +number of bytes per second as an integer, or using a rate suffix. + + +direct_io +--------- +.. note:: + This configuration option is only supported when the ``preferred_transfer_client`` + configuration value is set to or resolves to ``crt``. The ``classic`` transfer + client does not support this configuration option. + +.. note:: + This configuration option is only supported on Linux. + +**Default** - ``false`` + +If set to ``true``, the CRT client will enable direct IO to bypass the OS +cache when sending PUT requests. Enabling direct IO may be useful in cases +where the disk IO outperforms the kernel cache. + Experimental Configuration Values ================================= diff --git a/tests/unit/customizations/s3/test_factory.py b/tests/unit/customizations/s3/test_factory.py index f153fec5de12..f18b6f06724c 100644 --- a/tests/unit/customizations/s3/test_factory.py +++ b/tests/unit/customizations/s3/test_factory.py @@ -13,7 +13,7 @@ import awscrt.s3 import pytest import s3transfer.crt -from awscrt.s3 import S3RequestTlsMode +from awscrt.s3 import S3FileIoOptions, S3RequestTlsMode from botocore.config import Config from botocore.credentials import Credentials from botocore.httpsession import DEFAULT_CA_BUNDLE @@ -483,6 +483,27 @@ def test_target_bandwidth_configure_for_crt_manager(self, mock_crt_client): self.assert_is_crt_manager(transfer_manager) self.assert_expected_throughput_target_gbps(mock_crt_client, 8) + @mock.patch('s3transfer.crt.S3Client') + def test_fio_options_configure_for_crt_manager(self, mock_crt_client): + self.runtime_config = self.get_runtime_config( + preferred_transfer_client='crt', + should_stream=True, + disk_throughput=1000**3, + direct_io=True, + ) + transfer_manager = self.factory.create_transfer_manager( + self.params, self.runtime_config + ) + expected_fio_options = S3FileIoOptions( + should_stream=True, + disk_throughput_gbps=8.0, + direct_io=True, + ) + self.assert_is_crt_manager(transfer_manager) + self.assertEqual( + mock_crt_client.call_args[1]['fio_options'], expected_fio_options + ) + @mock.patch('s3transfer.crt.get_recommended_throughput_target_gbps') @mock.patch('s3transfer.crt.S3Client') def test_target_bandwidth_uses_crt_recommended_throughput( diff --git a/tests/unit/customizations/s3/test_transferconfig.py b/tests/unit/customizations/s3/test_transferconfig.py index 77e5ab46e9e7..0deecf860997 100644 --- a/tests/unit/customizations/s3/test_transferconfig.py +++ b/tests/unit/customizations/s3/test_transferconfig.py @@ -103,6 +103,12 @@ def test_set_preferred_transfer_client(self, provided, resolved): ('target_bandwidth', '1000', 1000), ('target_bandwidth', '1000B/s', 1000), ('target_bandwidth', '8000b/s', 1000), + # disk_throughput cases + ('disk_throughput', '1MB/s', 1024 * 1024), + ('disk_throughput', '10Mb/s', 10 * 1024 * 1024 / 8), + ('disk_throughput', '1000', 1000), + ('disk_throughput', '1000B/s', 1000), + ('disk_throughput', '8000b/s', 1000), ], ) def test_rate_conversions(self, config_name, provided, expected): @@ -127,6 +133,13 @@ def test_rate_conversions(self, config_name, provided, expected): ('target_bandwidth', '100/s'), ('target_bandwidth', ''), ('target_bandwidth', 'value-with-no-digits'), + # disk_throughput cases + ('disk_throughput', '1MB'), + ('disk_throughput', '1B'), + ('disk_throughput', '1b'), + ('disk_throughput', '100/s'), + ('disk_throughput', ''), + ('disk_throughput', 'value-with-no-digits'), ], ) def test_invalid_rate_values(self, config_name, provided): @@ -138,6 +151,22 @@ def test_validates_preferred_transfer_client_choices(self): with pytest.raises(transferconfig.InvalidConfigError): self.build_config_with(preferred_transfer_client='not-supported') + @pytest.mark.parametrize( + 'attr,val,expected', + [ + ('should_stream', 'true', True), + ('should_stream', 'false', False), + ('should_stream', None, None), + ('direct_io', 'true', True), + ('direct_io', 'false', False), + ('direct_io', None, None), + ], + ) + def test_convert_booleans(self, attr, val, expected): + params = {attr: val} + runtime_config = self.build_config_with(**params) + assert runtime_config[attr] == expected + class TestConvertToS3TransferConfig: def test_convert(self): diff --git a/tests/unit/s3transfer/test_crt.py b/tests/unit/s3transfer/test_crt.py index a6b5da1b516a..5e863d2428c3 100644 --- a/tests/unit/s3transfer/test_crt.py +++ b/tests/unit/s3transfer/test_crt.py @@ -16,6 +16,7 @@ from botocore.credentials import Credentials, ReadOnlyCredentials from botocore.exceptions import ClientError, NoCredentialsError from botocore.session import Session +from s3transfer.constants import GB from s3transfer.exceptions import TransferNotDoneError from s3transfer.utils import CallArgs @@ -365,3 +366,55 @@ def test_target_throughput( def test_always_enables_s3express(self, mock_s3_crt_client): s3transfer.crt.create_s3_crt_client('us-west-2') assert mock_s3_crt_client.call_args[1]['enable_s3express'] is True + + @pytest.mark.parametrize( + 'fio_options,should_stream,disk_throughput,direct_io', + [ + ({'should_stream': True}, True, 0.0, False), + ({'disk_throughput_gbps': 8}, False, 8, False), + ({'direct_io': True}, False, 0.0, True), + ( + {'should_stream': True, 'disk_throughput_gbps': 8}, + True, + 8, + False, + ), + ({'should_stream': True, 'direct_io': True}, True, 0.0, True), + ({'disk_throughput_gbps': 8, 'direct_io': True}, False, 8, True), + ( + { + 'should_stream': True, + 'disk_throughput_gbps': 8, + 'direct_io': True, + }, + True, + 8, + True, + ), + ], + ) + def test_fio_options( + self, + fio_options, + should_stream, + disk_throughput, + direct_io, + mock_s3_crt_client, + ): + params = {'fio_options': fio_options} + s3transfer.crt.create_s3_crt_client( + 'us-west-2', + **params, + ) + assert ( + mock_s3_crt_client.call_args[1]['fio_options'].should_stream + is should_stream + ) + assert ( + mock_s3_crt_client.call_args[1]['fio_options'].disk_throughput_gbps + == disk_throughput + ) + assert ( + mock_s3_crt_client.call_args[1]['fio_options'].direct_io + is direct_io + )