diff --git a/api/management-model/src/test/java/org/apache/polaris/core/admin/model/CatalogSerializationTest.java b/api/management-model/src/test/java/org/apache/polaris/core/admin/model/CatalogSerializationTest.java index c4210486ba..04cd2cbbb0 100644 --- a/api/management-model/src/test/java/org/apache/polaris/core/admin/model/CatalogSerializationTest.java +++ b/api/management-model/src/test/java/org/apache/polaris/core/admin/model/CatalogSerializationTest.java @@ -71,6 +71,7 @@ public void testJsonFormat() throws JsonProcessingException { + "\"storageConfigInfo\":{" + "\"roleArn\":\"arn:aws:iam::123456789012:role/test-role\"," + "\"pathStyleAccess\":false," + + "\"ignoreSSLVerification\":false," + "\"storageType\":\"S3\"," + "\"allowedLocations\":[]" + "}}"); diff --git a/getting-started/minio-https/README.md b/getting-started/minio-https/README.md new file mode 100644 index 0000000000..3df789c6f8 --- /dev/null +++ b/getting-started/minio-https/README.md @@ -0,0 +1,94 @@ + + +# Getting Started with Apache Polaris and MinIO (HTTPS) + +## Overview + +This example uses MinIO (HTTPS) as a storage provider with Polaris, it can be used for other S3-Compatible Storage API with Ignoring SSL Verification for Development/Test Purposes + +Spark is used as a query engine. This example assumes a local Spark installation. +See the [Spark Notebooks Example](../spark/README.md) for a more advanced Spark setup. + +## Starting the Example + +1. Build the Polaris server image if it's not already present locally: + + ```shell + ./gradlew \ + :polaris-server:assemble \ + :polaris-server:quarkusAppPartsBuild --rerun \ + -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group by running the following command from the root of the repository: + + ```shell + docker compose -f getting-started/minio/docker-compose.yml up + ``` + +## Connecting From Spark + +```shell +bin/spark-sql \ + --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0,org.apache.iceberg:iceberg-aws-bundle:1.9.0 \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.polaris.type=rest \ + --conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ + --conf spark.sql.catalog.polaris.token-refresh-enabled=false \ + --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \ + --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \ + --conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ + --conf spark.sql.catalog.polaris.credential=root:s3cr3t \ + --conf spark.sql.catalog.polaris.client.region=irrelevant +``` + +Note: `s3cr3t` is defined as the password for the `root` users in the `docker-compose.yml` file. + +Note: The `client.region` configuration is required for the AWS S3 client to work, but it is not used in this example +since MinIO does not require a specific region. + +## Running Queries + +Run inside the Spark SQL shell: + +``` +spark-sql (default)> use polaris; +Time taken: 0.837 seconds + +spark-sql ()> create namespace ns; +Time taken: 0.374 seconds + +spark-sql ()> create table ns.t1 as select 'abc'; +Time taken: 2.192 seconds + +spark-sql ()> select * from ns.t1; +abc +Time taken: 0.579 seconds, Fetched 1 row(s) +``` + +## MinIO Endpoints + +Note that the catalog configuration defined in the `docker-compose.yml` contains +different endpoints for the Polaris Server and the client (Spark). Specifically, +the client endpoint is `https://localhost:9000`, but `endpointInternal` is `https://minio:9000`. + +This is necessary because clients running on `localhost` do not normally see service +names (such as `minio`) that are internal to the docker compose environment. diff --git a/getting-started/minio-https/docker-compose.yml b/getting-started/minio-https/docker-compose.yml new file mode 100644 index 0000000000..f65bce149e --- /dev/null +++ b/getting-started/minio-https/docker-compose.yml @@ -0,0 +1,150 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + + minio: + image: quay.io/minio/minio:latest + ports: + # API port + - "9000:9000" + # UI port + - "9001:9001" + environment: + MINIO_ROOT_USER: minio_root + MINIO_ROOT_PASSWORD: m1n1opwd + command: + - "server" + - "/data" + - "--console-address" + - ":9001" + volumes: + # Mount the generated certs volume into MinIO + - minio_certs:/root/.minio/certs + depends_on: + certs-init: + condition: service_completed_successfully + healthcheck: + # Use HTTPS healthcheck now that MinIO will serve TLS on 9000 + test: ["CMD", "curl", "-k", "https://127.0.0.1:9000/minio/health/live"] + interval: 1s + timeout: 10s + + polaris: + image: apache/polaris:latest + ports: + # API port + - "8181:8181" + # Optional, allows attaching a debugger to the Polaris JVM + - "5005:5005" + depends_on: + minio: + condition: service_healthy + setup_bucket: + condition: service_completed_successfully + environment: + JAVA_DEBUG: true + JAVA_DEBUG_PORT: "*:5005" + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: minio_root + AWS_SECRET_ACCESS_KEY: m1n1opwd + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + polaris.realm-context.realms: POLARIS + quarkus.otel.sdk.disabled: "true" + quarkus.log.category."org.apache.polaris".level: DEBUG + quarkus.log.category."org.apache.polaris.catalog".level: DEBUG + quarkus.log.category."org.apache.polaris.table".level: DEBUG + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 10 + start_period: 10s + + setup_bucket: + image: quay.io/minio/mc:latest + depends_on: + minio: + condition: service_healthy + entrypoint: "/bin/sh" + command: + - "-c" + - >- + echo Creating MinIO bucket...; + mc alias set pol https://minio:9000 minio_root m1n1opwd --insecure; + mc mb pol/bucket123 --insecure; + mc ls pol --insecure; + echo Bucket setup complete.; + + # TLS certs are created at `docker compose up` by the certs-init one-shot service + certs-init: + image: alpine:3.18 + entrypoint: "/bin/sh" + command: + - "-c" + - >- + apk add --no-cache openssl; + printf '%s\n' "[req]" "ndistinguished_name = req_distinguished_name" "req_extensions = v3_req" "prompt = no" "" \ + "[req_distinguished_name]" "CN = localhost" "" "[v3_req]" "subjectAltName = @alt_names" "" "[alt_names]" "DNS.1 = localhost" "DNS.2 = minio" "IP.1 = 127.0.0.1" \ + > /openssl.cnf; + openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ + -keyout /certs/private.key -out /certs/public.crt \ + -subj "/C=US/ST=State/L=City/O=Org/CN=localhost" -extensions v3_req -config /openssl.cnf; + chmod 600 /certs/private.key || true; chmod 644 /certs/public.crt || true; + echo Generated certs.; + volumes: + - minio_certs:/certs + restart: "no" + + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + environment: + CLIENT_ID: root + CLIENT_SECRET: s3cr3t + # Use HTTPS endpoints and indicate to Polaris to ignore SSL verification for this self-signed setup + STORAGE_CONFIG_INFO: '{"storageType":"S3","endpoint":"https://localhost:9000","endpointInternal":"https://minio:9000","pathStyleAccess":true,"ignoreSSLVerification":true, "stsUnavailable":true}' + STORAGE_LOCATION: 's3://bucket123' + volumes: + - ../assets/polaris/:/polaris + entrypoint: "/bin/sh" + command: + - "-c" + - >- + chmod +x /polaris/create-catalog.sh; + chmod +x /polaris/obtain-token.sh; + source /polaris/obtain-token.sh; + echo Creating catalog...; + /polaris/create-catalog.sh POLARIS $$TOKEN; + echo Extra grants...; + curl -H "Authorization: Bearer $$TOKEN" -H 'Content-Type: application/json' \ + -X PUT \ + http://polaris:8181/api/management/v1/catalogs/quickstart_catalog/catalog-roles/catalog_admin/grants \ + -d '{"type":"catalog", "privilege":"CATALOG_MANAGE_CONTENT"}'; + echo Done.; + +volumes: + minio_certs: + driver: local + driver_opts: + type: tmpfs + device: tmpfs + o: "size=1m,mode=1777" diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index 055ccd8959..a6cc3b7dbf 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -166,6 +166,7 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .setEndpoint(awsConfig.getEndpoint()) .setStsEndpoint(awsConfig.getStsEndpoint()) + .setIgnoreSSLVerification(awsConfig.getIgnoreSSLVerification()) .setPathStyleAccess(awsConfig.getPathStyleAccess()) .setStsUnavailable(awsConfig.getStsUnavailable()) .setEndpointInternal(awsConfig.getEndpointInternal()) @@ -315,6 +316,7 @@ public Builder setStorageConfigurationInfo( .pathStyleAccess(awsConfigModel.getPathStyleAccess()) .stsUnavailable(awsConfigModel.getStsUnavailable()) .endpointInternal(awsConfigModel.getEndpointInternal()) + .ignoreSSLVerification(awsConfigModel.getIgnoreSSLVerification()) .build(); config = awsConfig; break; diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsCredentialsStorageIntegration.java index 8023f7a607..451ce1b922 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsCredentialsStorageIntegration.java @@ -102,7 +102,11 @@ public AccessConfig getSubscopedCreds( @SuppressWarnings("resource") // Note: stsClientProvider returns "thin" clients that do not need closing StsClient stsClient = - stsClientProvider.stsClient(StsDestination.of(storageConfig.getStsEndpointUri(), region)); + stsClientProvider.stsClient( + StsDestination.of( + storageConfig.getStsEndpointUri(), + region, + storageConfig.getIgnoreSSLVerification())); AssumeRoleResponse response = stsClient.assumeRole(request.build()); accessConfig.put(StorageAccessProperty.AWS_KEY_ID, response.credentials().accessKeyId()); @@ -139,6 +143,12 @@ public AccessConfig getSubscopedCreds( StorageAccessProperty.AWS_ENDPOINT.getPropertyName(), internalEndpointUri.toString()); } + // Propagate ignore SSL verification flag so callers (e.g., FileIOFactory) can honor it when + // constructing S3 clients for metadata ops. + if (Boolean.TRUE.equals(storageConfig.getIgnoreSSLVerification())) { + accessConfig.putInternalProperty("polaris.ignore-ssl-verification", "true"); + } + if (Boolean.TRUE.equals(storageConfig.getPathStyleAccess())) { accessConfig.put(StorageAccessProperty.AWS_PATH_STYLE_ACCESS, Boolean.TRUE.toString()); } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java index b3d7d60790..982ab7f828 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java @@ -108,6 +108,9 @@ public URI getInternalEndpointUri() { @Nullable public abstract String getStsEndpoint(); + /** Flag indicating whether SSL certificate verification should be disabled */ + public abstract @Nullable Boolean getIgnoreSSLVerification(); + /** Returns the STS endpoint if set, defaulting to {@link #getEndpointUri()} otherwise. */ @JsonIgnore @Nullable diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/StsClientProvider.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/StsClientProvider.java index b5a1fefe14..3329659be4 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/StsClientProvider.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/StsClientProvider.java @@ -52,8 +52,16 @@ interface StsDestination { @Value.Parameter(order = 2) Optional region(); - static StsDestination of(@Nullable URI endpoint, @Nullable String region) { - return ImmutableStsDestination.of(Optional.ofNullable(endpoint), Optional.ofNullable(region)); + /** Whether to ignore SSL certificate verification */ + @Value.Parameter(order = 3) + Optional ignoreSSLVerification(); + + static StsDestination of( + @Nullable URI endpoint, @Nullable String region, @Nullable Boolean ignoreSSLVerification) { + return ImmutableStsDestination.of( + Optional.ofNullable(endpoint), + Optional.ofNullable(region), + Optional.ofNullable(ignoreSSLVerification)); } } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/io/DefaultFileIOFactory.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/io/DefaultFileIOFactory.java index 44f038d72f..5ec52c4be6 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/catalog/io/DefaultFileIOFactory.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/io/DefaultFileIOFactory.java @@ -39,6 +39,12 @@ import org.apache.polaris.core.storage.PolarisCredentialVendor; import org.apache.polaris.core.storage.PolarisStorageActions; import org.apache.polaris.core.storage.cache.StorageCredentialCache; +import org.apache.polaris.service.catalog.io.s3.ReflectionS3ClientInjector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.http.SdkHttpClient; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; /** * A default FileIO factory implementation for creating Iceberg {@link FileIO} instances with @@ -52,6 +58,8 @@ @Identifier("default") public class DefaultFileIOFactory implements FileIOFactory { + private static final Logger LOGGER = LoggerFactory.getLogger(DefaultFileIOFactory.class); + private final StorageCredentialCache storageCredentialCache; private final MetaStoreManagerFactory metaStoreManagerFactory; @@ -63,6 +71,14 @@ public DefaultFileIOFactory( this.metaStoreManagerFactory = metaStoreManagerFactory; } + @Inject + @Identifier("aws-sdk-http-client") + SdkHttpClient sdkHttpClient; + + @Inject + @Identifier("aws-sdk-http-client-insecure") + SdkHttpClient insecureSdkHttpClient; + @Override public FileIO loadFileIO( @Nonnull CallContext callContext, @@ -109,6 +125,48 @@ public FileIO loadFileIO( @VisibleForTesting FileIO loadFileIOInternal( @Nonnull String ioImplClassName, @Nonnull Map properties) { - return new ExceptionMappingFileIO(CatalogUtil.loadFileIO(ioImplClassName, properties, null)); + FileIO fileIO = CatalogUtil.loadFileIO(ioImplClassName, properties, null); + + // If this is Iceberg's S3FileIO and the storage config requested ignoring SSL verification, + // try to construct and inject an S3 client that uses our insecure SDK HTTP client. This is + // a best-effort reflective integration to make the 'ignoreSSLVerification' flag effective + // for the S3 client used by Iceberg. + try { + boolean ignoreSsl = "true".equals(properties.get("polaris.ignore-ssl-verification")); + if (ignoreSsl && "org.apache.iceberg.aws.s3.S3FileIO".equals(ioImplClassName)) { + try { + // Build prebuilt S3 clients using the insecure SDK HTTP client + S3Client prebuilt = + ReflectionS3ClientInjector.buildS3Client(insecureSdkHttpClient, properties); + S3AsyncClient prebuiltAsync = + ReflectionS3ClientInjector.buildS3AsyncClient(insecureSdkHttpClient, properties); + boolean supplierInjected = + ReflectionS3ClientInjector.injectSupplierIntoS3FileIO( + fileIO, prebuilt, prebuiltAsync); + if (supplierInjected) { + LOGGER.info( + "Injected SerializableSupplier for insecure S3 client into Iceberg S3FileIO for ioImpl={}", + ioImplClassName); + } else { + LOGGER.warn( + "Requested ignore-ssl-verification but failed to inject insecure S3Client supplier into {}.\n" + + "Consider importing the storage certificate into Polaris JVM truststore or using HTTP endpoints for local testing.", + ioImplClassName); + } + } catch (Throwable t) { + LOGGER.warn( + "Failed to build or inject prebuilt S3 client for {}: {}", + ioImplClassName, + t.getMessage()); + } + } + } catch (Exception e) { + LOGGER.warn( + "Exception while attempting to inject S3 client for {}: {}", + ioImplClassName, + e.toString()); + } + + return new ExceptionMappingFileIO(fileIO); } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjector.java b/runtime/service/src/main/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjector.java new file mode 100644 index 0000000000..75708f67e8 --- /dev/null +++ b/runtime/service/src/main/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjector.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.service.catalog.io.s3; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.net.URI; +import java.util.Map; +import org.apache.polaris.core.storage.StorageAccessProperty; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.http.SdkHttpClient; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3ClientBuilder; +import software.amazon.awssdk.services.s3.S3Configuration; + +/** + * Best-effort reflective injector that attempts to wire a pre-built S3Client into Iceberg's + * S3FileIO instance. This avoids needing to modify Iceberg. + */ +public final class ReflectionS3ClientInjector { + private static final Logger LOGGER = LoggerFactory.getLogger(ReflectionS3ClientInjector.class); + + private ReflectionS3ClientInjector() {} + + /** + * For Iceberg's S3FileIO implementation, PrefixedS3Client holds SerializableSupplier + * fields named 's3' and 's3Async' which are used to lazily construct the real clients. To ensure + * Iceberg will use our prebuilt S3Client (configured with the provided SdkHttpClient), replace + * those supplier fields with suppliers that return the provided instances. + */ + public static boolean injectSupplierIntoS3FileIO( + Object s3FileIOInstance, S3Client prebuiltS3Client, S3AsyncClient prebuiltS3AsyncClient) { + if (s3FileIOInstance == null) { + return false; + } + + Class clazz = s3FileIOInstance.getClass(); + boolean changed = false; + while (clazz != null) { + try { + Field s3Field = null; + Field s3AsyncField = null; + try { + s3Field = clazz.getDeclaredField("s3"); + } catch (NoSuchFieldException ignored) { + } + try { + s3AsyncField = clazz.getDeclaredField("s3Async"); + } catch (NoSuchFieldException ignored) { + } + + if (s3Field != null) { + s3Field.setAccessible(true); + // Build a SerializableSupplier that returns our prebuilt S3Client. + // Use a simple SerializableSupplier implementation used by Iceberg: + // org.apache.iceberg.util.SerializableSupplier + try { + Class serializableSupplierClazz = + Class.forName("org.apache.iceberg.util.SerializableSupplier"); + Object serializableSupplier = + java.lang.reflect.Proxy.newProxyInstance( + serializableSupplierClazz.getClassLoader(), + new Class[] {serializableSupplierClazz, java.io.Serializable.class}, + (proxy, method, args) -> { + if ("get".equals(method.getName())) { + return prebuiltS3Client; + } + // default proxy behavior + return method.invoke(proxy, args); + }); + + s3Field.set(s3FileIOInstance, serializableSupplier); + changed = true; + } catch (ClassNotFoundException cnfe) { + // Fallback: try to set any field assignable from java.util.function.Supplier + Object simple = (java.util.function.Supplier) () -> prebuiltS3Client; + s3Field.set(s3FileIOInstance, simple); + changed = true; + } + } + + if (s3AsyncField != null && prebuiltS3AsyncClient != null) { + s3AsyncField.setAccessible(true); + try { + Class serializableSupplierClazz = + Class.forName("org.apache.iceberg.util.SerializableSupplier"); + Object serializableSupplierAsync = + java.lang.reflect.Proxy.newProxyInstance( + serializableSupplierClazz.getClassLoader(), + new Class[] {serializableSupplierClazz, java.io.Serializable.class}, + (proxy, method, args) -> { + if ("get".equals(method.getName())) { + return prebuiltS3AsyncClient; + } + return method.invoke(proxy, args); + }); + + s3AsyncField.set(s3FileIOInstance, serializableSupplierAsync); + changed = true; + } catch (ClassNotFoundException cnfe) { + Object simpleAsync = + (java.util.function.Supplier) () -> prebuiltS3AsyncClient; + s3AsyncField.set(s3FileIOInstance, simpleAsync); + changed = true; + } + } + } catch (Throwable t) { + LOGGER.debug("Failed to set supplier fields on {}: {}", clazz, t.getMessage()); + } + + clazz = clazz.getSuperclass(); + } + + return changed; + } + + public static S3Client buildS3Client(SdkHttpClient httpClient, Map properties) { + S3ClientBuilder builder = S3Client.builder(); + if (httpClient != null) { + builder.httpClient(httpClient); + } + AwsCredentialsProvider creds = credentialsProviderFrom(properties); + if (creds != null) builder.credentialsProvider(creds); + + applyRegionAndEndpoint(builder, properties); + builder.serviceConfiguration(s3ConfigurationFrom(properties)); + + return builder.build(); + } + + public static S3AsyncClient buildS3AsyncClient( + SdkHttpClient httpClient, Map properties) { + try { + // Attempt to build an async client. If the provided httpClient is an instance that also + // implements the async HTTP client interface, use it. Otherwise fall back to the + // default async client builder. + software.amazon.awssdk.http.async.SdkAsyncHttpClient asyncHttpClient = null; + if (httpClient instanceof software.amazon.awssdk.http.async.SdkAsyncHttpClient async) { + asyncHttpClient = async; + } + + software.amazon.awssdk.services.s3.S3AsyncClientBuilder asyncBuilder = + software.amazon.awssdk.services.s3.S3AsyncClient.builder(); + + if (asyncHttpClient != null) { + asyncBuilder.httpClient(asyncHttpClient); + } + + AwsCredentialsProvider creds = credentialsProviderFrom(properties); + if (creds != null) asyncBuilder.credentialsProvider(creds); + + applyRegionAndEndpoint(asyncBuilder, properties); + asyncBuilder.serviceConfiguration(s3ConfigurationFrom(properties)); + + return asyncBuilder.build(); + } catch (Exception e) { + LOGGER.debug("Failed to build S3AsyncClient: {}", e.toString()); + return null; + } + } + + private static AwsCredentialsProvider credentialsProviderFrom(Map properties) { + String accessKey = properties.get(StorageAccessProperty.AWS_KEY_ID.getPropertyName()); + String secretKey = properties.get(StorageAccessProperty.AWS_SECRET_KEY.getPropertyName()); + if (accessKey != null && secretKey != null) { + AwsBasicCredentials creds = AwsBasicCredentials.create(accessKey, secretKey); + return StaticCredentialsProvider.create(creds); + } + return null; + } + + private static void applyRegionAndEndpoint(Object builder, Map properties) { + String region = properties.get(StorageAccessProperty.CLIENT_REGION.getPropertyName()); + if (region != null) { + try { + Method m = builder.getClass().getMethod("region", Region.class); + m.invoke(builder, Region.of(region)); + } catch (Exception ignored) { + LOGGER.debug("Unable to apply region to builder {}", builder.getClass().getName()); + } + } + + String endpoint = properties.get(StorageAccessProperty.AWS_ENDPOINT.getPropertyName()); + if (endpoint != null) { + try { + Method m = builder.getClass().getMethod("endpointOverride", URI.class); + m.invoke(builder, URI.create(endpoint)); + } catch (Exception ignored) { + LOGGER.debug( + "Unable to apply endpointOverride to builder {}", builder.getClass().getName()); + } + } + } + + private static S3Configuration s3ConfigurationFrom(Map properties) { + String pathStyle = + properties.get(StorageAccessProperty.AWS_PATH_STYLE_ACCESS.getPropertyName()); + boolean forcePathStyle = "true".equals(pathStyle); + return S3Configuration.builder().pathStyleAccessEnabled(forcePathStyle).build(); + } +} diff --git a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java index 6e25dbfc18..70344225a4 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/config/ServiceProducers.java @@ -34,6 +34,10 @@ import jakarta.ws.rs.core.Context; import java.time.Clock; import java.util.stream.Collectors; +import javax.net.ssl.SSLContext; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.ssl.SSLContextBuilder; import org.apache.polaris.core.PolarisCallContext; import org.apache.polaris.core.PolarisDefaultDiagServiceImpl; import org.apache.polaris.core.PolarisDiagnostics; @@ -230,6 +234,50 @@ public SdkHttpClient sdkHttpClient(S3AccessConfig config) { return httpClient.build(); } + /** + * Producer that creates an insecure SDK HTTP client (trusts all certs). This allows other + * components to explicitly request the insecure client instance when wiring clients that need to + * ignore TLS verification (for development/test setups only). + */ + @Produces + @Singleton + @Identifier("aws-sdk-http-client-insecure") + public SdkHttpClient insecureSdkHttpClient(S3AccessConfig config) { + return createInsecureHttpClient(config); + } + + /** + * Creates an HTTP client that bypasses SSL certificate verification. WARNING: This should only be + * used for development and testing environments. + */ + public SdkHttpClient createInsecureHttpClient(S3AccessConfig config) { + try { + SSLContext sslContext = + SSLContextBuilder.create().loadTrustMaterial(null, (chain, authType) -> true).build(); + + ApacheHttpClient.Builder httpClient = + ApacheHttpClient.builder() + .socketFactory( + new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE)); + + // Apply configuration options + config.maxHttpConnections().ifPresent(httpClient::maxConnections); + config.readTimeout().ifPresent(httpClient::socketTimeout); + config.connectTimeout().ifPresent(httpClient::connectionTimeout); + config.connectionAcquisitionTimeout().ifPresent(httpClient::connectionAcquisitionTimeout); + config.connectionMaxIdleTime().ifPresent(httpClient::connectionMaxIdleTime); + config.connectionTimeToLive().ifPresent(httpClient::connectionTimeToLive); + config.expectContinueEnabled().ifPresent(httpClient::expectContinueEnabled); + + LOGGER.warn( + "Creating HTTP client with SSL certificate verification disabled. Use only in development!"); + return httpClient.build(); + } catch (Exception e) { + LOGGER.error("Failed to create insecure HTTP client, using secure client instead", e); + return sdkHttpClient(config); + } + } + public void closeSdkHttpClient( @Disposes @Identifier("aws-sdk-http-client") SdkHttpClient client) { client.close(); @@ -241,7 +289,10 @@ public StsClientsPool stsClientsPool( @Identifier("aws-sdk-http-client") SdkHttpClient httpClient, StorageConfiguration config, MeterRegistry meterRegistry) { - return new StsClientsPool(config.effectiveClientsCacheMaxSize(), httpClient, meterRegistry); + // Create insecure HTTP client for SSL bypass scenarios + SdkHttpClient insecureHttpClient = createInsecureHttpClient(config); + return new StsClientsPool( + config.effectiveClientsCacheMaxSize(), httpClient, insecureHttpClient, meterRegistry); } /** diff --git a/runtime/service/src/main/java/org/apache/polaris/service/credentials/connection/SigV4ConnectionCredentialVendor.java b/runtime/service/src/main/java/org/apache/polaris/service/credentials/connection/SigV4ConnectionCredentialVendor.java index d85d318cfe..3caf634b94 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/credentials/connection/SigV4ConnectionCredentialVendor.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/credentials/connection/SigV4ConnectionCredentialVendor.java @@ -144,7 +144,8 @@ StsClient getStsClient(@Nonnull SigV4AuthenticationParametersDpo sigv4Params) { // Get STS client from the provider (potentially pooled) // The Polaris service identity credentials are set on the AssumeRole request via // overrideConfiguration, not on the STS client itself - // TODO: Configure proper StsDestination with region/endpoint from sigv4Params - return stsClientProvider.stsClient(StsClientProvider.StsDestination.of(null, null)); + // TODO: Configure proper StsDestination with region/endpoint/ignoreSSLVerification from + // sigv4Params + return stsClientProvider.stsClient(StsClientProvider.StsDestination.of(null, null, null)); } } diff --git a/runtime/service/src/main/java/org/apache/polaris/service/storage/aws/StsClientsPool.java b/runtime/service/src/main/java/org/apache/polaris/service/storage/aws/StsClientsPool.java index 170274d40a..07f9a6c234 100644 --- a/runtime/service/src/main/java/org/apache/polaris/service/storage/aws/StsClientsPool.java +++ b/runtime/service/src/main/java/org/apache/polaris/service/storage/aws/StsClientsPool.java @@ -45,8 +45,8 @@ public class StsClientsPool implements StsClientProvider { private static final String CACHE_NAME = "sts-clients"; - private final Cache clients; - private final Function clientBuilder; + private final Cache clients; + private final Function clientBuilder; public StsClientsPool( int clientsCacheMaxSize, SdkHttpClient sdkHttpClient, MeterRegistry meterRegistry) { @@ -56,10 +56,21 @@ public StsClientsPool( Optional.ofNullable(meterRegistry)); } + public StsClientsPool( + int clientsCacheMaxSize, + SdkHttpClient sdkHttpClient, + SdkHttpClient insecureHttpClient, + MeterRegistry meterRegistry) { + this( + clientsCacheMaxSize, + key -> createStsClient(key, sdkHttpClient, insecureHttpClient), + Optional.of(meterRegistry)); + } + @VisibleForTesting StsClientsPool( int maxSize, - Function clientBuilder, + Function clientBuilder, Optional meterRegistry) { this.clientBuilder = clientBuilder; this.clients = @@ -70,11 +81,12 @@ public StsClientsPool( } @Override - public StsClient stsClient(StsDestination destination) { + public StsClient stsClient(StsClientProvider.StsDestination destination) { return clients.get(destination, clientBuilder); } - private static StsClient defaultStsClient(StsDestination parameters, SdkHttpClient sdkClient) { + private static StsClient defaultStsClient( + StsClientProvider.StsDestination parameters, SdkHttpClient sdkClient) { StsClientBuilder builder = StsClient.builder(); builder.httpClient(sdkClient); if (parameters.endpoint().isPresent()) { @@ -98,4 +110,24 @@ static StatsCounter statsCounter(Optional meterRegistry, int maxS } return StatsCounter.disabledStatsCounter(); } + + private static StsClient createStsClient( + StsClientProvider.StsDestination parameters, + SdkHttpClient secureClient, + SdkHttpClient insecureClient) { + StsClientBuilder builder = StsClient.builder(); + + boolean ignoreSSL = parameters.ignoreSSLVerification().orElse(false); + builder.httpClient(ignoreSSL ? insecureClient : secureClient); + + if (parameters.endpoint().isPresent()) { + CompletableFuture endpointFuture = + completedFuture(Endpoint.builder().url(parameters.endpoint().get()).build()); + builder.endpointProvider(params -> endpointFuture); + } + + parameters.region().ifPresent(r -> builder.region(Region.of(r))); + + return builder.build(); + } } diff --git a/runtime/service/src/test/java/org/apache/polaris/service/admin/ManagementServiceTest.java b/runtime/service/src/test/java/org/apache/polaris/service/admin/ManagementServiceTest.java index e13d6fe080..af6bd08791 100644 --- a/runtime/service/src/test/java/org/apache/polaris/service/admin/ManagementServiceTest.java +++ b/runtime/service/src/test/java/org/apache/polaris/service/admin/ManagementServiceTest.java @@ -529,4 +529,59 @@ public void testCreateCatalogReturnErrorOnFailure() { resultWithError.getReturnStatus(), resultWithError.getExtraInformation())); } + + @Test + public void testCreateCatalogWithIgnoreSSLVerification() { + // Create a new test service that allows setting S3 endpoints + TestServices sslTestServices = + TestServices.builder() + .config(Map.of("SUPPORTED_CATALOG_STORAGE_TYPES", List.of("S3", "GCS", "AZURE"))) + .config(Map.of("ALLOW_SETTING_S3_ENDPOINTS", Boolean.TRUE)) + .build(); + + AwsStorageConfigInfo awsConfigModel = + AwsStorageConfigInfo.builder() + .setRoleArn("arn:aws:iam::123456789012:role/my-role") + .setExternalId("externalId") + .setUserArn("userArn") + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3) + .setAllowedLocations(List.of("s3://my-bucket/path/to/data")) + .setStsEndpoint("https://sts.example.com:4443") + .setIgnoreSSLVerification(true) + .build(); + + String catalogName = "ssl-ignore-catalog"; + Catalog catalog = + PolarisCatalog.builder() + .setType(Catalog.TypeEnum.INTERNAL) + .setName(catalogName) + .setProperties(new CatalogProperties("s3://my-bucket/path/to/data")) + .setStorageConfigInfo(awsConfigModel) + .build(); + + // Verify catalog creation succeeds with ignoreSSLVerification flag + try (Response response = + sslTestServices + .catalogsApi() + .createCatalog( + new CreateCatalogRequest(catalog), + sslTestServices.realmContext(), + sslTestServices.securityContext())) { + assertThat(response).returns(Response.Status.CREATED.getStatusCode(), Response::getStatus); + } + + // Verify the flag is persisted and returned in GET + try (Response response = + sslTestServices + .catalogsApi() + .getCatalog( + catalogName, sslTestServices.realmContext(), sslTestServices.securityContext())) { + assertThat(response).returns(Response.Status.OK.getStatusCode(), Response::getStatus); + Catalog fetchedCatalog = (Catalog) response.getEntity(); + AwsStorageConfigInfo storageConfig = + (AwsStorageConfigInfo) fetchedCatalog.getStorageConfigInfo(); + assertThat(storageConfig.getIgnoreSSLVerification()).isTrue(); + assertThat(storageConfig.getStsEndpoint()).isEqualTo("https://sts.example.com:4443"); + } + } } diff --git a/runtime/service/src/test/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjectorConfigTest.java b/runtime/service/src/test/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjectorConfigTest.java new file mode 100644 index 0000000000..94dfe07ec9 --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjectorConfigTest.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.service.catalog.io.s3; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.lang.reflect.Method; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import org.apache.polaris.core.storage.StorageAccessProperty; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Configuration; + +/** Tests that S3 client builders apply region/endpoint/path-style/credentials as expected. */ +@SuppressWarnings("unused") +public class ReflectionS3ClientInjectorConfigTest { + + private Map makeProps(String region, String endpoint, boolean pathStyle) { + Map p = new HashMap<>(); + p.put(StorageAccessProperty.CLIENT_REGION.getPropertyName(), region); + p.put(StorageAccessProperty.AWS_ENDPOINT.getPropertyName(), endpoint); + p.put( + StorageAccessProperty.AWS_PATH_STYLE_ACCESS.getPropertyName(), Boolean.toString(pathStyle)); + p.put(StorageAccessProperty.AWS_KEY_ID.getPropertyName(), "AKIA_TEST"); + p.put(StorageAccessProperty.AWS_SECRET_KEY.getPropertyName(), "SECRET_TEST"); + return p; + } + + // helper removed: deep traversal not used after switching to reflective checks + + @Test + public void testBuildS3Client_appliesConfiguration() throws Exception { + Map props = makeProps("us-west-2", "https://custom.example:9000", true); + + // Verify credentials helper + Method credsMethod = + ReflectionS3ClientInjector.class.getDeclaredMethod("credentialsProviderFrom", Map.class); + credsMethod.setAccessible(true); + Object credsProv = credsMethod.invoke(null, props); + assertTrue(credsProv instanceof AwsCredentialsProvider); + AwsCredentialsProvider prov = (AwsCredentialsProvider) credsProv; + var creds = prov.resolveCredentials(); + assertTrue("AKIA_TEST".equals(creds.accessKeyId())); + assertTrue("SECRET_TEST".equals(creds.secretAccessKey())); + + // Verify S3Configuration helper + Method s3ConfMethod = + ReflectionS3ClientInjector.class.getDeclaredMethod("s3ConfigurationFrom", Map.class); + s3ConfMethod.setAccessible(true); + Object s3Conf = s3ConfMethod.invoke(null, props); + assertTrue(s3Conf instanceof S3Configuration); + assertTrue(((S3Configuration) s3Conf).pathStyleAccessEnabled()); + + // Verify applyRegionAndEndpoint applies to arbitrary builder-like objects + @SuppressWarnings("unused") + class TestBuilder { + public Region regionVal; + public URI endpointVal; + + public TestBuilder region(Region r) { + this.regionVal = r; + return this; + } + + public TestBuilder endpointOverride(URI u) { + this.endpointVal = u; + return this; + } + } + + TestBuilder tb = new TestBuilder(); + Method applyMethod = + ReflectionS3ClientInjector.class.getDeclaredMethod( + "applyRegionAndEndpoint", Object.class, Map.class); + applyMethod.setAccessible(true); + applyMethod.invoke(null, tb, props); + assertEquals("us-west-2", tb.regionVal.id()); + assertEquals(new URI("https://custom.example:9000"), tb.endpointVal); + } + + @Test + public void testBuildS3AsyncClient_appliesConfiguration() throws Exception { + Map props = makeProps("eu-central-1", "https://async.example:9000", false); + + // credentials + Method credsMethod = + ReflectionS3ClientInjector.class.getDeclaredMethod("credentialsProviderFrom", Map.class); + credsMethod.setAccessible(true); + Object credsProv = credsMethod.invoke(null, props); + assertTrue(credsProv instanceof AwsCredentialsProvider); + AwsCredentialsProvider prov = (AwsCredentialsProvider) credsProv; + var creds = prov.resolveCredentials(); + assertEquals("AKIA_TEST", creds.accessKeyId()); + + // s3 config + Method s3ConfMethod = + ReflectionS3ClientInjector.class.getDeclaredMethod("s3ConfigurationFrom", Map.class); + s3ConfMethod.setAccessible(true); + Object s3Conf = s3ConfMethod.invoke(null, props); + assertTrue(s3Conf instanceof S3Configuration); + assertTrue(!((S3Configuration) s3Conf).pathStyleAccessEnabled()); + + // applyRegionAndEndpoint on TestBuilder + @SuppressWarnings("unused") + class TestBuilder { + public Region regionVal; + public URI endpointVal; + + public TestBuilder region(Region r) { + this.regionVal = r; + return this; + } + + public TestBuilder endpointOverride(URI u) { + this.endpointVal = u; + return this; + } + } + + TestBuilder tb = new TestBuilder(); + Method applyMethod = + ReflectionS3ClientInjector.class.getDeclaredMethod( + "applyRegionAndEndpoint", Object.class, Map.class); + applyMethod.setAccessible(true); + applyMethod.invoke(null, tb, props); + assertEquals("eu-central-1", tb.regionVal.id()); + assertEquals(new URI("https://async.example:9000"), tb.endpointVal); + } +} diff --git a/runtime/service/src/test/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjectorTest.java b/runtime/service/src/test/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjectorTest.java new file mode 100644 index 0000000000..ae4da15b0a --- /dev/null +++ b/runtime/service/src/test/java/org/apache/polaris/service/catalog/io/s3/ReflectionS3ClientInjectorTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.service.catalog.io.s3; + +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.function.Supplier; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; + +/** Unit tests for ReflectionS3ClientInjector. */ +public class ReflectionS3ClientInjectorTest { + + private static class DummyS3FileIO { + // Simulate Iceberg's SerializableSupplier and SerializableSupplier + public Supplier s3; + public Supplier s3Async; + } + + @Test + public void testInjectSupplierIntoS3FileIO_replacesSuppliers() { + DummyS3FileIO fileIO = new DummyS3FileIO(); + + // Build simple clients with default builders (they won't be used to actually call network) + S3Client prebuilt = S3Client.builder().build(); + S3AsyncClient prebuiltAsync = S3AsyncClient.builder().build(); + + boolean injected = + ReflectionS3ClientInjector.injectSupplierIntoS3FileIO(fileIO, prebuilt, prebuiltAsync); + assertTrue(injected, "Expected supplier injection to return true"); + + // The injected suppliers should return the prebuilt instances + Supplier s3Supplier = fileIO.s3; + Supplier s3AsyncSupplier = fileIO.s3Async; + + assertSame(prebuilt, s3Supplier.get()); + assertSame(prebuiltAsync, s3AsyncSupplier.get()); + } +} diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index 59baaf99d8..dbef556159 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -1139,6 +1139,14 @@ components: Whether S3 requests to files in this catalog should use 'path-style addressing for buckets'. example: true default: false + ignoreSSLVerification: + type: boolean + description: >- + Whether SSL certificate verification should be disabled for STS and S3 endpoints (optional). + WARNING: This should only be used for development and testing environments with self-signed certificates. + Disabling SSL verification in production environments compromises security. + example: false + default: false AzureStorageConfigInfo: type: object