From 2a11fd8ae47cbc51fc87a5d019099528266f0f0c Mon Sep 17 00:00:00 2001 From: awstools Date: Thu, 29 Jun 2023 19:15:00 +0000 Subject: [PATCH] feat(client-glue): This release adds support for AWS Glue Crawler with Iceberg Tables, allowing Crawlers to discover Iceberg Tables in S3 and register them in Glue Data Catalog for query engines to query against. --- .../src/commands/BatchGetCrawlersCommand.ts | 12 ++++ .../src/commands/CreateCrawlerCommand.ts | 12 ++++ .../src/commands/GetCrawlerCommand.ts | 12 ++++ .../src/commands/GetCrawlersCommand.ts | 12 ++++ .../src/commands/UpdateCrawlerCommand.ts | 12 ++++ clients/client-glue/src/models/models_0.ts | 49 +++++++++----- clients/client-glue/src/models/models_1.ts | 66 +++++-------------- clients/client-glue/src/models/models_2.ts | 50 +++++++++++++- .../client-glue/src/protocols/Aws_json1_1.ts | 15 ++++- codegen/sdk-codegen/aws-models/glue.json | 44 +++++++++++++ 10 files changed, 216 insertions(+), 68 deletions(-) diff --git a/clients/client-glue/src/commands/BatchGetCrawlersCommand.ts b/clients/client-glue/src/commands/BatchGetCrawlersCommand.ts index 6293b66f40b8..f9aa233cfdbb 100644 --- a/clients/client-glue/src/commands/BatchGetCrawlersCommand.ts +++ b/clients/client-glue/src/commands/BatchGetCrawlersCommand.ts @@ -115,6 +115,18 @@ export interface BatchGetCrawlersCommandOutput extends BatchGetCrawlersResponse, * // CreateNativeDeltaTable: true || false, * // }, * // ], + * // IcebergTargets: [ // IcebergTargetList + * // { // IcebergTarget + * // Paths: [ + * // "STRING_VALUE", + * // ], + * // ConnectionName: "STRING_VALUE", + * // Exclusions: [ + * // "STRING_VALUE", + * // ], + * // MaximumTraversalDepth: Number("int"), + * // }, + * // ], * // }, * // DatabaseName: "STRING_VALUE", * // Description: "STRING_VALUE", diff --git a/clients/client-glue/src/commands/CreateCrawlerCommand.ts b/clients/client-glue/src/commands/CreateCrawlerCommand.ts index b383121adb21..f412b8393448 100644 --- a/clients/client-glue/src/commands/CreateCrawlerCommand.ts +++ b/clients/client-glue/src/commands/CreateCrawlerCommand.ts @@ -110,6 +110,18 @@ export interface CreateCrawlerCommandOutput extends CreateCrawlerResponse, __Met * CreateNativeDeltaTable: true || false, * }, * ], + * IcebergTargets: [ // IcebergTargetList + * { // IcebergTarget + * Paths: [ + * "STRING_VALUE", + * ], + * ConnectionName: "STRING_VALUE", + * Exclusions: [ + * "STRING_VALUE", + * ], + * MaximumTraversalDepth: Number("int"), + * }, + * ], * }, * Schedule: "STRING_VALUE", * Classifiers: [ // ClassifierNameList diff --git a/clients/client-glue/src/commands/GetCrawlerCommand.ts b/clients/client-glue/src/commands/GetCrawlerCommand.ts index cc1a24b858ba..62596ebffa4f 100644 --- a/clients/client-glue/src/commands/GetCrawlerCommand.ts +++ b/clients/client-glue/src/commands/GetCrawlerCommand.ts @@ -112,6 +112,18 @@ export interface GetCrawlerCommandOutput extends GetCrawlerResponse, __MetadataB * // CreateNativeDeltaTable: true || false, * // }, * // ], + * // IcebergTargets: [ // IcebergTargetList + * // { // IcebergTarget + * // Paths: [ + * // "STRING_VALUE", + * // ], + * // ConnectionName: "STRING_VALUE", + * // Exclusions: [ + * // "STRING_VALUE", + * // ], + * // MaximumTraversalDepth: Number("int"), + * // }, + * // ], * // }, * // DatabaseName: "STRING_VALUE", * // Description: "STRING_VALUE", diff --git a/clients/client-glue/src/commands/GetCrawlersCommand.ts b/clients/client-glue/src/commands/GetCrawlersCommand.ts index 224eefff4677..6166e1fc5df8 100644 --- a/clients/client-glue/src/commands/GetCrawlersCommand.ts +++ b/clients/client-glue/src/commands/GetCrawlersCommand.ts @@ -115,6 +115,18 @@ export interface GetCrawlersCommandOutput extends GetCrawlersResponse, __Metadat * // CreateNativeDeltaTable: true || false, * // }, * // ], + * // IcebergTargets: [ // IcebergTargetList + * // { // IcebergTarget + * // Paths: [ + * // "STRING_VALUE", + * // ], + * // ConnectionName: "STRING_VALUE", + * // Exclusions: [ + * // "STRING_VALUE", + * // ], + * // MaximumTraversalDepth: Number("int"), + * // }, + * // ], * // }, * // DatabaseName: "STRING_VALUE", * // Description: "STRING_VALUE", diff --git a/clients/client-glue/src/commands/UpdateCrawlerCommand.ts b/clients/client-glue/src/commands/UpdateCrawlerCommand.ts index 58004cae8d54..103d1521145e 100644 --- a/clients/client-glue/src/commands/UpdateCrawlerCommand.ts +++ b/clients/client-glue/src/commands/UpdateCrawlerCommand.ts @@ -110,6 +110,18 @@ export interface UpdateCrawlerCommandOutput extends UpdateCrawlerResponse, __Met * CreateNativeDeltaTable: true || false, * }, * ], + * IcebergTargets: [ // IcebergTargetList + * { // IcebergTarget + * Paths: [ + * "STRING_VALUE", + * ], + * ConnectionName: "STRING_VALUE", + * Exclusions: [ + * "STRING_VALUE", + * ], + * MaximumTraversalDepth: Number("int"), + * }, + * ], * }, * Schedule: "STRING_VALUE", * Classifiers: [ // ClassifierNameList diff --git a/clients/client-glue/src/models/models_0.ts b/clients/client-glue/src/models/models_0.ts index 621b35becd0a..fd24080aefd2 100644 --- a/clients/client-glue/src/models/models_0.ts +++ b/clients/client-glue/src/models/models_0.ts @@ -1658,6 +1658,33 @@ export interface DynamoDBTarget { scanRate?: number; } +/** + * @public + *

Specifies an Apache Iceberg data source where Iceberg tables are stored in Amazon S3.

+ */ +export interface IcebergTarget { + /** + *

One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.

+ */ + Paths?: string[]; + + /** + *

The name of the connection to use to connect to the Iceberg target.

+ */ + ConnectionName?: string; + + /** + *

A list of glob patterns used to exclude from the crawl. + * For more information, see Catalog Tables with a Crawler.

+ */ + Exclusions?: string[]; + + /** + *

The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time.

+ */ + MaximumTraversalDepth?: number; +} + /** * @public * @enum @@ -1793,6 +1820,11 @@ export interface CrawlerTargets { *

Specifies Delta data store targets.

*/ DeltaTargets?: DeltaTarget[]; + + /** + *

Specifies Apache Iceberg data store targets.

+ */ + IcebergTargets?: IcebergTarget[]; } /** @@ -8199,20 +8231,3 @@ export interface MLUserDataEncryption { */ KmsKeyId?: string; } - -/** - * @public - *

The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.

- *

Additionally, imported labels and trained transforms can now be encrypted using a customer provided KMS key.

- */ -export interface TransformEncryption { - /** - *

An MLUserDataEncryption object containing the encryption mode and customer-provided KMS key ID.

- */ - MlUserDataEncryption?: MLUserDataEncryption; - - /** - *

The name of the security configuration.

- */ - TaskRunSecurityConfigurationName?: string; -} diff --git a/clients/client-glue/src/models/models_1.ts b/clients/client-glue/src/models/models_1.ts index a02c77228e13..8122adedd4a4 100644 --- a/clients/client-glue/src/models/models_1.ts +++ b/clients/client-glue/src/models/models_1.ts @@ -22,6 +22,7 @@ import { FederatedDatabase, GlueTable, JobRun, + MLUserDataEncryption, Partition, PartitionInput, PartitionValueList, @@ -31,7 +32,6 @@ import { SchemaId, StorageDescriptor, TaskStatusType, - TransformEncryption, TransformParameters, TransformType, Trigger, @@ -39,6 +39,23 @@ import { WorkerType, } from "./models_0"; +/** + * @public + *

The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.

+ *

Additionally, imported labels and trained transforms can now be encrypted using a customer provided KMS key.

+ */ +export interface TransformEncryption { + /** + *

An MLUserDataEncryption object containing the encryption mode and customer-provided KMS key ID.

+ */ + MlUserDataEncryption?: MLUserDataEncryption; + + /** + *

The name of the security configuration.

+ */ + TaskRunSecurityConfigurationName?: string; +} + /** * @public */ @@ -7215,50 +7232,3 @@ export interface GetUserDefinedFunctionRequest { */ FunctionName: string | undefined; } - -/** - * @public - *

Represents the equivalent of a Hive user-defined function - * (UDF) definition.

- */ -export interface UserDefinedFunction { - /** - *

The name of the function.

- */ - FunctionName?: string; - - /** - *

The name of the catalog database that contains the function.

- */ - DatabaseName?: string; - - /** - *

The Java class that contains the function code.

- */ - ClassName?: string; - - /** - *

The owner of the function.

- */ - OwnerName?: string; - - /** - *

The owner type.

- */ - OwnerType?: PrincipalType | string; - - /** - *

The time at which the function was created.

- */ - CreateTime?: Date; - - /** - *

The resource URIs for the function.

- */ - ResourceUris?: ResourceUri[]; - - /** - *

The ID of the Data Catalog in which the function resides.

- */ - CatalogId?: string; -} diff --git a/clients/client-glue/src/models/models_2.ts b/clients/client-glue/src/models/models_2.ts index ff73eb7977d7..b5b56c258cdf 100644 --- a/clients/client-glue/src/models/models_2.ts +++ b/clients/client-glue/src/models/models_2.ts @@ -106,9 +106,11 @@ import { DataCatalogEncryptionSettings, DataQualityEvaluationRunAdditionalRunOptions, JobBookmarkEntry, + PrincipalType, RegistryId, RegistryStatus, ResourceShareType, + ResourceUri, SchemaStatus, SchemaVersionNumber, SchemaVersionStatus, @@ -118,10 +120,56 @@ import { TableInput, TransformFilterCriteria, TransformSortCriteria, - UserDefinedFunction, UserDefinedFunctionInput, } from "./models_1"; +/** + * @public + *

Represents the equivalent of a Hive user-defined function + * (UDF) definition.

+ */ +export interface UserDefinedFunction { + /** + *

The name of the function.

+ */ + FunctionName?: string; + + /** + *

The name of the catalog database that contains the function.

+ */ + DatabaseName?: string; + + /** + *

The Java class that contains the function code.

+ */ + ClassName?: string; + + /** + *

The owner of the function.

+ */ + OwnerName?: string; + + /** + *

The owner type.

+ */ + OwnerType?: PrincipalType | string; + + /** + *

The time at which the function was created.

+ */ + CreateTime?: Date; + + /** + *

The resource URIs for the function.

+ */ + ResourceUris?: ResourceUri[]; + + /** + *

The ID of the Data Catalog in which the function resides.

+ */ + CatalogId?: string; +} + /** * @public */ diff --git a/clients/client-glue/src/protocols/Aws_json1_1.ts b/clients/client-glue/src/protocols/Aws_json1_1.ts index ffaecac623cd..17c34b40ae93 100644 --- a/clients/client-glue/src/protocols/Aws_json1_1.ts +++ b/clients/client-glue/src/protocols/Aws_json1_1.ts @@ -561,6 +561,7 @@ import { GlueTable, GovernedCatalogSource, GovernedCatalogTarget, + IcebergTarget, IdempotentParameterMismatchException, IllegalSessionStateException, InternalServiceException, @@ -650,7 +651,6 @@ import { StorageDescriptor, StreamingDataPreviewOptions, TransformConfigParameter, - TransformEncryption, TransformParameters, Union, UpsertRedshiftTargetOptions, @@ -863,10 +863,10 @@ import { TaskRun, TaskRunFilterCriteria, TaskRunSortCriteria, + TransformEncryption, TransformFilterCriteria, TransformSortCriteria, UnfilteredPartition, - UserDefinedFunction, UserDefinedFunctionInput, XMLClassifier, } from "../models/models_1"; @@ -999,6 +999,7 @@ import { UpdateUserDefinedFunctionRequest, UpdateWorkflowRequest, UpdateXMLClassifierRequest, + UserDefinedFunction, VersionMismatchException, } from "../models/models_2"; @@ -15959,6 +15960,7 @@ const se_CrawlerTargets = (input: CrawlerTargets, context: __SerdeContext): any CatalogTargets: _json, DeltaTargets: _json, DynamoDBTargets: (_) => se_DynamoDBTargetList(_, context), + IcebergTargets: _json, JdbcTargets: _json, MongoDBTargets: _json, S3Targets: _json, @@ -16654,6 +16656,10 @@ const se_GetTablesRequest = (input: GetTablesRequest, context: __SerdeContext): // se_GovernedCatalogTarget omitted. +// se_IcebergTarget omitted. + +// se_IcebergTargetList omitted. + // se_ImportCatalogToGlueRequest omitted. // se_JDBCConnectorOptions omitted. @@ -18199,6 +18205,7 @@ const de_CrawlerTargets = (output: any, context: __SerdeContext): CrawlerTargets CatalogTargets: _json, DeltaTargets: _json, DynamoDBTargets: (_: any) => de_DynamoDBTargetList(_, context), + IcebergTargets: _json, JdbcTargets: _json, MongoDBTargets: _json, S3Targets: _json, @@ -19553,6 +19560,10 @@ const de_GrokClassifier = (output: any, context: __SerdeContext): GrokClassifier }) as any; }; +// de_IcebergTarget omitted. + +// de_IcebergTargetList omitted. + // de_IdempotentParameterMismatchException omitted. // de_IllegalBlueprintStateException omitted. diff --git a/codegen/sdk-codegen/aws-models/glue.json b/codegen/sdk-codegen/aws-models/glue.json index 447d85d0dc30..4f0eb459bbdd 100644 --- a/codegen/sdk-codegen/aws-models/glue.json +++ b/codegen/sdk-codegen/aws-models/glue.json @@ -6980,6 +6980,12 @@ "traits": { "smithy.api#documentation": "

Specifies Delta data store targets.

" } + }, + "IcebergTargets": { + "target": "com.amazonaws.glue#IcebergTargetList", + "traits": { + "smithy.api#documentation": "

Specifies Apache Iceberg data store targets.

" + } } }, "traits": { @@ -19758,6 +19764,44 @@ } } }, + "com.amazonaws.glue#IcebergTarget": { + "type": "structure", + "members": { + "Paths": { + "target": "com.amazonaws.glue#PathList", + "traits": { + "smithy.api#documentation": "

One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.

" + } + }, + "ConnectionName": { + "target": "com.amazonaws.glue#ConnectionName", + "traits": { + "smithy.api#documentation": "

The name of the connection to use to connect to the Iceberg target.

" + } + }, + "Exclusions": { + "target": "com.amazonaws.glue#PathList", + "traits": { + "smithy.api#documentation": "

A list of glob patterns used to exclude from the crawl.\n For more information, see Catalog Tables with a Crawler.

" + } + }, + "MaximumTraversalDepth": { + "target": "com.amazonaws.glue#NullableInteger", + "traits": { + "smithy.api#documentation": "

The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time.

" + } + } + }, + "traits": { + "smithy.api#documentation": "

Specifies an Apache Iceberg data source where Iceberg tables are stored in Amazon S3.

" + } + }, + "com.amazonaws.glue#IcebergTargetList": { + "type": "list", + "member": { + "target": "com.amazonaws.glue#IcebergTarget" + } + }, "com.amazonaws.glue#IdString": { "type": "string", "traits": {