From f7546115099bd36c26e57864a50973cc515de360 Mon Sep 17 00:00:00 2001 From: Wenjun Ruan Date: Fri, 24 May 2024 14:15:03 +0800 Subject: [PATCH] [DSIP-42] Add dolphinscheduler-aws-authentication module (#16043) --- deploy/kubernetes/dolphinscheduler/README.md | 11 +- .../dolphinscheduler/templates/configmap.yaml | 2 +- .../kubernetes/dolphinscheduler/values.yaml | 21 ++- docs/docs/en/architecture/configuration.md | 22 ++- docs/docs/en/guide/resource/configuration.md | 160 ++++-------------- docs/docs/en/guide/task/dms.md | 17 +- docs/docs/en/guide/task/sagemaker.md | 17 +- docs/docs/zh/guide/resource/configuration.md | 157 +++-------------- docs/docs/zh/guide/task/dms.md | 17 +- docs/docs/zh/guide/task/sagemaker.md | 17 +- .../dolphinscheduler-alert-server.xml | 7 + .../assembly/dolphinscheduler-api-server.xml | 7 + .../pom.xml | 64 +++++++ .../aws/AWSCredentialsProviderFactor.java | 74 ++++++++ .../aws/AWSCredentialsProviderType.java | 49 ++++++ ...DatabaseMigrationServiceClientFactory.java | 53 ++++++ .../AmazonElasticMapReduceClientFactory.java | 53 ++++++ .../aws/AmazonS3ClientFactory.java | 54 ++++++ .../aws/AmazonSageMakerClientFactory.java | 53 ++++++ .../aws/AwsConfigurationKeys.java | 28 +++ .../aws/DataSyncClientFactory.java | 47 +++++ .../src/main/resources/aws.yaml | 65 +++++++ dolphinscheduler-authentication/pom.xml | 46 +++++ dolphinscheduler-common/pom.xml | 4 + .../common/constants/Constants.java | 15 +- .../common/log/remote/S3RemoteLogHandler.java | 60 +------ .../common/utils/PropertyUtils.java | 16 +- .../src/main/resources/common.properties | 22 +-- .../src/main/resources/remote-logging.yaml | 18 +- .../common/utils/PropertyUtilsTest.java | 13 ++ .../src/test/resources/common.properties | 6 +- .../resources/docker/file-manage/aws.yaml | 65 +++++++ .../docker/file-manage/common.properties | 11 -- .../docker/file-manage/docker-compose.yaml | 1 + .../dolphinscheduler-master-server.xml | 7 + .../dolphinscheduler-standalone-server.xml | 7 + .../plugin/storage/s3/S3StorageOperator.java | 53 +----- .../storage/s3/S3StorageOperatorTest.java | 9 - .../src/test/resources/common.properties | 11 -- .../dolphinscheduler-task-datasync/pom.xml | 8 +- .../plugin/task/datasync/DatasyncHook.java | 19 +-- .../dolphinscheduler-task-dms/pom.xml | 8 +- .../plugin/task/dms/DmsHook.java | 20 +-- .../dolphinscheduler-task-emr/pom.xml | 9 +- .../plugin/task/emr/AbstractEmrTask.java | 25 +-- .../dolphinscheduler-task-sagemaker/pom.xml | 11 +- .../plugin/task/sagemaker/SagemakerTask.java | 18 +- .../dolphinscheduler-worker-server.xml | 7 + pom.xml | 7 + 49 files changed, 899 insertions(+), 592 deletions(-) create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/pom.xml create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderFactor.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderType.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSDatabaseMigrationServiceClientFactory.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonElasticMapReduceClientFactory.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonS3ClientFactory.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonSageMakerClientFactory.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AwsConfigurationKeys.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/DataSyncClientFactory.java create mode 100644 dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources/aws.yaml create mode 100644 dolphinscheduler-authentication/pom.xml create mode 100644 dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/aws.yaml diff --git a/deploy/kubernetes/dolphinscheduler/README.md b/deploy/kubernetes/dolphinscheduler/README.md index c5e75ca938..209e4e3942 100644 --- a/deploy/kubernetes/dolphinscheduler/README.md +++ b/deploy/kubernetes/dolphinscheduler/README.md @@ -120,6 +120,12 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst | conf.auto | bool | `false` | auto restart, if true, all components will be restarted automatically after the common configuration is updated. if false, you need to restart the components manually. default is false | | conf.common."alert.rpc.port" | int | `50052` | rpc port | | conf.common."appId.collect" | string | `"log"` | way to collect applicationId: log, aop | +| conf.common."aws.credentials.provider.type" | string | `"AWSStaticCredentialsProvider"` | | +| conf.common."aws.s3.access.key.id" | string | `"minioadmin"` | The AWS access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required | +| conf.common."aws.s3.access.key.secret" | string | `"minioadmin"` | The AWS secret access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required | +| conf.common."aws.s3.bucket.name" | string | `"dolphinscheduler"` | The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. | +| conf.common."aws.s3.endpoint" | string | `"http://minio:9000"` | You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn | +| conf.common."aws.s3.region" | string | `"ca-central-1"` | The AWS Region to use. if resource.storage.type=S3, This configuration is required | | conf.common."conda.path" | string | `"/opt/anaconda3/etc/profile.d/conda.sh"` | set path of conda.sh | | conf.common."data-quality.jar.dir" | string | `nil` | data quality option | | conf.common."data.basedir.path" | string | `"/tmp/dolphinscheduler"` | user data local directory path, please make sure the directory exists and have read write permissions | @@ -138,11 +144,6 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst | conf.common."resource.alibaba.cloud.oss.bucket.name" | string | `"dolphinscheduler"` | oss bucket name, required if you set resource.storage.type=OSS | | conf.common."resource.alibaba.cloud.oss.endpoint" | string | `"https://oss-cn-hangzhou.aliyuncs.com"` | oss bucket endpoint, required if you set resource.storage.type=OSS | | conf.common."resource.alibaba.cloud.region" | string | `"cn-hangzhou"` | alibaba cloud region, required if you set resource.storage.type=OSS | -| conf.common."resource.aws.access.key.id" | string | `"minioadmin"` | The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required | -| conf.common."resource.aws.region" | string | `"ca-central-1"` | The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required | -| conf.common."resource.aws.s3.bucket.name" | string | `"dolphinscheduler"` | The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. | -| conf.common."resource.aws.s3.endpoint" | string | `"http://minio:9000"` | You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn | -| conf.common."resource.aws.secret.access.key" | string | `"minioadmin"` | The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required | | conf.common."resource.azure.client.id" | string | `"minioadmin"` | azure storage account name, required if you set resource.storage.type=ABS | | conf.common."resource.azure.client.secret" | string | `"minioadmin"` | azure storage account key, required if you set resource.storage.type=ABS | | conf.common."resource.azure.subId" | string | `"minioadmin"` | azure storage subId, required if you set resource.storage.type=ABS | diff --git a/deploy/kubernetes/dolphinscheduler/templates/configmap.yaml b/deploy/kubernetes/dolphinscheduler/templates/configmap.yaml index c9af1c00a5..8c1d515ca7 100644 --- a/deploy/kubernetes/dolphinscheduler/templates/configmap.yaml +++ b/deploy/kubernetes/dolphinscheduler/templates/configmap.yaml @@ -37,7 +37,7 @@ data: {{- range $key, $value := index .Values.conf "common" }} {{- if and $.Values.minio.enabled }} {{- if eq $key "resource.storage.type" }}{{ $value = "S3" }}{{- end }} - {{- if eq $key "resource.aws.s3.endpoint" }}{{ $value = print "http://" (include "dolphinscheduler.minio.fullname" $) ":9000" }}{{- end }} + {{- if eq $key "aws.s3.endpoint" }}{{ $value = print "http://" (include "dolphinscheduler.minio.fullname" $) ":9000" }}{{- end }} {{- end }} {{ $key }}={{ $value }} {{- end }} diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml index 61b6f0def2..3481d600fe 100644 --- a/deploy/kubernetes/dolphinscheduler/values.yaml +++ b/deploy/kubernetes/dolphinscheduler/values.yaml @@ -250,20 +250,25 @@ conf: # -- resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended resource.storage.upload.base.path: /dolphinscheduler - # -- The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required - resource.aws.access.key.id: minioadmin + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + aws.credentials.provider.type: AWSStaticCredentialsProvider - # -- The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required - resource.aws.secret.access.key: minioadmin + # -- The AWS access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required + aws.s3.access.key.id: minioadmin - # -- The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required - resource.aws.region: ca-central-1 + # -- The AWS secret access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required + aws.s3.access.key.secret: minioadmin + + # -- The AWS Region to use. if resource.storage.type=S3, This configuration is required + aws.s3.region: ca-central-1 # -- The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. - resource.aws.s3.bucket.name: dolphinscheduler + aws.s3.bucket.name: dolphinscheduler # -- You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn - resource.aws.s3.endpoint: http://minio:9000 + aws.s3.endpoint: http://minio:9000 # -- alibaba cloud access key id, required if you set resource.storage.type=OSS resource.alibaba.cloud.access.key.id: diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index cc3bc94fc0..a490e47d89 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -191,19 +191,21 @@ The default configuration is as follows: Note that DolphinScheduler also supports zookeeper related configuration through `bin/env/dolphinscheduler_env.sh`. -For ETCD Registry, please see more details on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-etcd/README.md). -For JDBC Registry, please see more details on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-jdbc/README.md). +For ETCD Registry, please see more details +on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-etcd/README.md). +For JDBC Registry, please see more details +on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-jdbc/README.md). ### common.properties [hadoop、s3、yarn config properties] Currently, common.properties mainly configures Hadoop,s3a related configurations. Configuration file location: -| Service | Configuration file | -|---------------|----------------------------------------| -| Master Server | `master-server/conf/common.properties` | -| Api Server | `api-server/conf/common.properties` | -| Worker Server | `worker-server/conf/common.properties` | -| Alert Server | `alert-server/conf/common.properties` | +| Service | Configuration file | +|---------------|-----------------------------------------------------------------------| +| Master Server | `master-server/conf/common.properties` | +| Api Server | `api-server/conf/common.properties`, `api-server/conf/aws.yaml` | +| Worker Server | `worker-server/conf/common.properties`, `worker-server/conf/aws.yaml` | +| Alert Server | `alert-server/conf/common.properties` | The default configuration is as follows: @@ -212,10 +214,6 @@ The default configuration is as follows: | data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files | | resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE | | resource.upload.path | /dolphinscheduler | storage path of resource files | -| aws.access.key.id | minioadmin | access key id of S3 | -| aws.secret.access.key | minioadmin | secret access key of S3 | -| aws.region | us-east-1 | region of S3 | -| aws.s3.endpoint | http://minio:9000 | endpoint of S3 | | hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS | | fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory | | hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission | diff --git a/docs/docs/en/guide/resource/configuration.md b/docs/docs/en/guide/resource/configuration.md index 6bee9e5a67..1c5b389302 100644 --- a/docs/docs/en/guide/resource/configuration.md +++ b/docs/docs/en/guide/resource/configuration.md @@ -28,74 +28,37 @@ The configuration you may need to change: ## connect AWS S3 -if you want to upload resources to `Resource Center` connected to `S3`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`. You can refer to the following: +if you want to upload resources to `Resource Center` connected to `S3`, you need to configure `api-server/conf/common.properties`, `api-server/conf/aws.yaml` and `worker-server/conf/common.properties`, `worker-server/conf/aws.yaml`. You can refer to the following: config the following fields ```properties -...... resource.storage.type=S3 +``` -...... - -resource.aws.access.key.id=aws_access_key_id -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key=aws_secret_access_key -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region=us-west-2 -# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. -resource.aws.s3.bucket.name=dolphinscheduler -# You need to set this parameter when private cloud s4. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint= +```yaml +aws: + s3: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: + access.key.secret: + region: + bucket.name: + endpoint: -...... ``` -## Use HDFS or Remote Object Storage +## connect OSS S3 -After version 3.0.0-alpha, if you want to upload resources to `Resource Center` connected to `HDFS`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`. +if you want to upload resources to `Resource Center` connected to `OSS`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`. You can refer to the following: -```properties -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# user data local directory path, please make sure the directory exists and have read write permissions -data.basedir.path=/tmp/dolphinscheduler - -# resource view suffixs -#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js - -# resource storage type: LOCAL, HDFS, S3, OSS, GCS, ABS, OBS -resource.storage.type=LOCAL -# resource store on HDFS/S3/OSS path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended -resource.storage.upload.base.path=/tmp/dolphinscheduler - -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id=minioadmin -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key=minioadmin -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region=cn-north-1 -# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. -resource.aws.s3.bucket.name=dolphinscheduler -# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint=http://localhost:9000 +config the following fields +```properties # alibaba cloud access key id, required if you set resource.storage.type=OSS resource.alibaba.cloud.access.key.id= # alibaba cloud access key secret, required if you set resource.storage.type=OSS @@ -107,89 +70,24 @@ resource.alibaba.cloud.oss.bucket.name=dolphinscheduler # oss bucket endpoint, required if you set resource.storage.type=OSS resource.alibaba.cloud.oss.endpoint=https://oss-cn-hangzhou.aliyuncs.com -# alibaba cloud access key id, required if you set resource.storage.type=OBS +``` + +## connect OBS S3 + +if you want to upload resources to `Resource Center` connected to `OBS`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`. You can refer to the following: + +config the following fields + +```properties +# access key id, required if you set resource.storage.type=OBS resource.huawei.cloud.access.key.id= -# alibaba cloud access key secret, required if you set resource.storage.type=OBS +# access key secret, required if you set resource.storage.type=OBS resource.huawei.cloud.access.key.secret= # oss bucket name, required if you set resource.storage.type=OBS resource.huawei.cloud.obs.bucket.name=dolphinscheduler # oss bucket endpoint, required if you set resource.storage.type=OBS resource.huawei.cloud.obs.endpoint=obs.cn-southwest-2.huaweicloud.com -# if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path -resource.hdfs.root.user=hdfs -# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir -resource.hdfs.fs.defaultFS=hdfs://mycluster:8020 - -# whether to startup kerberos -hadoop.security.authentication.startup.state=false - -# java.security.krb5.conf path -java.security.krb5.conf.path=/opt/krb5.conf - -# login user from keytab username -login.user.keytab.username=hdfs-mycluster@ESZ.COM - -# login user from keytab path -login.user.keytab.path=/opt/hdfs.headless.keytab - -# kerberos expire time, the unit is hour -kerberos.expire.time=2 - - -# resourcemanager port, the default value is 8088 if not specified -resource.manager.httpaddress.port=8088 -# if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty -yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx -# if resourcemanager HA is enabled or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname -yarn.application.status.address=http://ds1:%s/ws/v1/cluster/apps/%s -# job history status url when application number threshold is reached(default 10000, maybe it was set to 1000) -yarn.job.history.status.address=http://ds1:19888/ws/v1/history/mapreduce/jobs/%s - -# datasource encryption enable -datasource.encryption.enable=false - -# datasource encryption salt -datasource.encryption.salt=!@#$%^&* - -# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in -# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server -# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). -data-quality.jar.dir= - -#data-quality.error.output.path=/tmp/data-quality-error-data - -# Network IP gets priority, default inner outer - -# Whether hive SQL is executed in the same session -support.hive.oneSession=false - -# use sudo or not, if set true, executing user is tenant user and deploy user needs sudo permissions; if set false, executing user is the deploy user and doesn't need sudo permissions -sudo.enable=true - -# network interface preferred like eth0, default: empty -#dolphin.scheduler.network.interface.preferred= - -# network IP gets priority, default: inner outer -#dolphin.scheduler.network.priority.strategy=default - -# system env path -#dolphinscheduler.env.path=dolphinscheduler_env.sh - -# development state -development.state=false - -# rpc port -alert.rpc.port=50052 - -# set path of conda.sh -conda.path=/opt/anaconda3/etc/profile.d/conda.sh - -# Task resource limit state -task.resource.limit.state=false - -# way to collect applicationId: log(original regex match), aop -appId.collect: log ``` > **Note:** diff --git a/docs/docs/en/guide/task/dms.md b/docs/docs/en/guide/task/dms.md index a19ec4ba4b..89cf2ef4db 100644 --- a/docs/docs/en/guide/task/dms.md +++ b/docs/docs/en/guide/task/dms.md @@ -73,14 +73,17 @@ Parameters of restarting the task by interface ## Environment to prepare -Some AWS configuration is required, modify a field in file `common.properties` +Some AWS configuration is required, modify a field in file `aws.yaml` ```yaml -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id= -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key= -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region= +dms: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: + access.key.secret: + region: + endpoint: ``` diff --git a/docs/docs/en/guide/task/sagemaker.md b/docs/docs/en/guide/task/sagemaker.md index 7782377130..b520dd33cc 100644 --- a/docs/docs/en/guide/task/sagemaker.md +++ b/docs/docs/en/guide/task/sagemaker.md @@ -35,14 +35,17 @@ The task plugin are shown as follows: ## Environment to prepare -Some AWS configuration is required, modify a field in file `common.properties` +Some AWS configuration is required, modify a field in file `aws.yaml` ```yaml -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id= -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key= -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region= +sagemaker: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: + access.key.secret: + region: + endpoint: ``` diff --git a/docs/docs/zh/guide/resource/configuration.md b/docs/docs/zh/guide/resource/configuration.md index 19a34c577c..735c75da97 100644 --- a/docs/docs/zh/guide/resource/configuration.md +++ b/docs/docs/zh/guide/resource/configuration.md @@ -26,77 +26,35 @@ Dolphinscheduler 资源中心使用本地系统默认是开启的,不需要用 ## 对接AWS S3 -如果需要使用到资源中心的 S3 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下: +如果需要使用到资源中心的 S3 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties`, `api-server/conf/aws.yaml` 和 `worker-server/conf/common.properties`, `worker-server/conf/aws.yaml`。可参考如下: 配置以下字段 ```properties -...... resource.storage.type=S3 - -...... - -resource.aws.access.key.id=aws_access_key_id -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key=aws_secret_access_key -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region=us-west-2 -# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. -resource.aws.s3.bucket.name=dolphinscheduler -# You need to set this parameter when private cloud s4. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint= - -...... ``` -## 对接分布式或远端对象存储 +```yaml +aws: + s3: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: + access.key.secret: + region: + bucket.name: + endpoint: -当需要使用资源中心进行相关文件的创建或者上传操作时,所有的文件和资源都会被存储在分布式文件系统`HDFS`或者远端的对象存储,如`S3`上。所以需要进行以下配置: +``` -### 配置 common.properties 文件 +## 对接阿里云 OSS -在 3.0.0-alpha 版本之后,如果需要使用到资源中心的 HDFS 或 S3 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下: +如果需要使用到资源中心的 OSS 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下: ```properties -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# user data local directory path, please make sure the directory exists and have read write permissions -data.basedir.path=/tmp/dolphinscheduler - -# resource storage type: LOCAL, HDFS, S3, OSS, GCS, ABS, OBS -resource.storage.type=LOCAL - -# resource store on HDFS/S3/OSS path, resource file will store to this hadoop hdfs path, self configuration, -# please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended -resource.storage.upload.base.path=/tmp/dolphinscheduler - -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id=minioadmin -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key=minioadmin -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region=cn-north-1 -# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. -resource.aws.s3.bucket.name=dolphinscheduler -# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint=http://localhost:9000 - # alibaba cloud access key id, required if you set resource.storage.type=OSS resource.alibaba.cloud.access.key.id= # alibaba cloud access key secret, required if you set resource.storage.type=OSS @@ -108,87 +66,22 @@ resource.alibaba.cloud.oss.bucket.name=dolphinscheduler # oss bucket endpoint, required if you set resource.storage.type=OSS resource.alibaba.cloud.oss.endpoint=https://oss-cn-hangzhou.aliyuncs.com -# alibaba cloud access key id, required if you set resource.storage.type=OBS +``` + +## 对接华为云 OBS + +如果需要使用到资源中心的 OBS 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下: + +```properties +# access key id, required if you set resource.storage.type=OBS resource.huawei.cloud.access.key.id= -# alibaba cloud access key secret, required if you set resource.storage.type=OBS +# access key secret, required if you set resource.storage.type=OBS resource.huawei.cloud.access.key.secret= # oss bucket name, required if you set resource.storage.type=OBS resource.huawei.cloud.obs.bucket.name=dolphinscheduler # oss bucket endpoint, required if you set resource.storage.type=OBS resource.huawei.cloud.obs.endpoint=obs.cn-southwest-2.huaweicloud.com -# if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path -resource.hdfs.root.user=root -# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; -# if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir -resource.hdfs.fs.defaultFS=hdfs://localhost:8020 - -# whether to startup kerberos -hadoop.security.authentication.startup.state=false - -# java.security.krb5.conf path -java.security.krb5.conf.path=/opt/krb5.conf - -# login user from keytab username -login.user.keytab.username=hdfs-mycluster@ESZ.COM - -# login user from keytab path -login.user.keytab.path=/opt/hdfs.headless.keytab - -# kerberos expire time, the unit is hour -kerberos.expire.time=2 -# resource view suffixs -#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js - -# resourcemanager port, the default value is 8088 if not specified -resource.manager.httpaddress.port=8088 -# if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty -yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx -# if resourcemanager HA is enabled or not use resourcemanager, please keep the default value; -# If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname -yarn.application.status.address=http://localhost:%s/ds/v1/cluster/apps/%s -# job history status url when application number threshold is reached(default 10000, maybe it was set to 1000) -yarn.job.history.status.address=http://localhost:19888/ds/v1/history/mapreduce/jobs/%s - -# datasource encryption enable -datasource.encryption.enable=false - -# datasource encryption salt -datasource.encryption.salt=!@#$%^&* - -# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in -# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server -# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). -data-quality.jar.dir= - -#data-quality.error.output.path=/tmp/data-quality-error-data - -# Network IP gets priority, default inner outer - -# Whether hive SQL is executed in the same session -support.hive.oneSession=false - -# use sudo or not, if set true, executing user is tenant user and deploy user needs sudo permissions; -# if set false, executing user is the deploy user and doesn't need sudo permissions -sudo.enable=true - -# network interface preferred like eth0, default: empty -#dolphin.scheduler.network.interface.preferred= - -# network IP gets priority, default: inner outer -#dolphin.scheduler.network.priority.strategy=default - -# system env path -#dolphinscheduler.env.path=env/dolphinscheduler_env.sh - -# development state -development.state=false - -# rpc port -alert.rpc.port=50052 - -# way to collect applicationId: log(original regex match), aop -appId.collect: log ``` > **注意**: diff --git a/docs/docs/zh/guide/task/dms.md b/docs/docs/zh/guide/task/dms.md index 6013f45aef..8a87a36f9a 100644 --- a/docs/docs/zh/guide/task/dms.md +++ b/docs/docs/zh/guide/task/dms.md @@ -73,14 +73,17 @@ DolphinScheduler 在 启动DMS 任务后,会跟中DMS任务状态,直至DMS ## 环境配置 -需要进行AWS的一些配置,修改`common.properties`中的以下配置信息 +需要进行AWS的一些配置,修改`aws.yml`中的以下配置信息 ```yaml -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id= -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key= -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region= +dms: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: + access.key.secret: + region: + endpoint: ``` diff --git a/docs/docs/zh/guide/task/sagemaker.md b/docs/docs/zh/guide/task/sagemaker.md index e4b4c61542..22927171e9 100644 --- a/docs/docs/zh/guide/task/sagemaker.md +++ b/docs/docs/zh/guide/task/sagemaker.md @@ -33,14 +33,17 @@ DolphinScheduler SageMaker 组件的功能: ## 环境配置 -需要进行AWS的一些配置,修改`common.properties`中的`xxxxx`为你的配置信息 +需要进行AWS的一些配置,修改`aws.yml`中的以下配置信息 ```yaml -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id= -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key= -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region= +sagemaker: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: + access.key.secret: + region: + endpoint: ``` diff --git a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml index bf28193b3f..24c8fb2f11 100644 --- a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml +++ b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml @@ -56,6 +56,13 @@ conf + + ${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources + + **/*.yaml + + conf + diff --git a/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml b/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml index c9fdab4d51..5453f8fb15 100644 --- a/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml +++ b/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml @@ -57,6 +57,13 @@ conf + + ${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources + + **/*.yaml + + conf + ${basedir}/../dolphinscheduler-ui/dist ./ui diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/pom.xml b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/pom.xml new file mode 100644 index 0000000000..9bad537cd9 --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/pom.xml @@ -0,0 +1,64 @@ + + + + 4.0.0 + + org.apache.dolphinscheduler + dolphinscheduler-authentication + dev-SNAPSHOT + + + dolphinscheduler-aws-authentication + + + + + com.amazonaws + aws-java-sdk-emr + + + + com.amazonaws + aws-java-sdk-s3 + + + + com.amazonaws + aws-java-sdk-sagemaker + + + + com.amazonaws + aws-java-sdk-dms + + + + software.amazon.awssdk + datasync + + + + org.slf4j + slf4j-api + provided + + + + + diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderFactor.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderFactor.java new file mode 100644 index 0000000000..c79c70d8c2 --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderFactor.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +import static org.apache.dolphinscheduler.authentication.aws.AwsConfigurationKeys.AWS_AUTHENTICATION_TYPE; + +import java.util.Map; + +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; + +@Slf4j +@UtilityClass +public class AWSCredentialsProviderFactor { + + public static AWSCredentialsProvider credentialsProvider(Map awsProperties) { + String awsAuthenticationType = awsProperties.getOrDefault( + AWS_AUTHENTICATION_TYPE, AWSCredentialsProviderType.STATIC_CREDENTIALS_PROVIDER.getName()); + AWSCredentialsProviderType awsCredentialsProviderType = + AWSCredentialsProviderType.of(awsAuthenticationType).orElse(null); + if (awsCredentialsProviderType == null) { + throw new IllegalArgumentException( + "The aws.credentials.provider.type: " + awsAuthenticationType + " is invalidated"); + } + switch (awsCredentialsProviderType) { + case STATIC_CREDENTIALS_PROVIDER: + return createAWSStaticCredentialsProvider(awsProperties); + case INSTANCE_PROFILE_CREDENTIALS_PROVIDER: + return createInstanceProfileCredentialsProvider(); + default: + throw new IllegalArgumentException( + "The aws.credentials.provider.type: " + awsAuthenticationType + " is invalidated"); + } + + } + + private static AWSCredentialsProvider createAWSStaticCredentialsProvider(Map awsProperties) { + String awsAccessKeyId = awsProperties.get(AwsConfigurationKeys.AWS_ACCESS_KEY_ID); + String awsSecretAccessKey = awsProperties.get(AwsConfigurationKeys.AWS_SECRET); + final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey); + AWSStaticCredentialsProvider awsStaticCredentialsProvider = + new AWSStaticCredentialsProvider(basicAWSCredentials); + log.info("AWSStaticCredentialsProvider created successfully"); + return awsStaticCredentialsProvider; + } + + private static AWSCredentialsProvider createInstanceProfileCredentialsProvider() { + InstanceProfileCredentialsProvider instanceProfileCredentialsProvider = + InstanceProfileCredentialsProvider.getInstance(); + log.info("InstanceProfileCredentialsProvider created successfully"); + return instanceProfileCredentialsProvider; + } + +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderType.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderType.java new file mode 100644 index 0000000000..9b932f1554 --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderType.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +import java.util.Optional; + +import lombok.Getter; + +@Getter +public enum AWSCredentialsProviderType { + + STATIC_CREDENTIALS_PROVIDER("AWSStaticCredentialsProvider"), + INSTANCE_PROFILE_CREDENTIALS_PROVIDER("InstanceProfileCredentialsProvider"), + ; + + private final String name; + + AWSCredentialsProviderType(String name) { + this.name = name; + } + + public static Optional of(String name) { + if (name == null) { + return Optional.empty(); + } + for (AWSCredentialsProviderType type : values()) { + if (type.getName().equalsIgnoreCase(name)) { + return Optional.of(type); + } + } + return Optional.empty(); + } + +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSDatabaseMigrationServiceClientFactory.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSDatabaseMigrationServiceClientFactory.java new file mode 100644 index 0000000000..2056268f2b --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSDatabaseMigrationServiceClientFactory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +import java.util.Map; + +import lombok.experimental.UtilityClass; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationService; +import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationServiceClientBuilder; + +@UtilityClass +public class AWSDatabaseMigrationServiceClientFactory { + + public AWSDatabaseMigrationService createAWSDatabaseMigrationServiceClient(Map awsProperties) { + AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties); + Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION)); + String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT); + + if (endpoint != null && !endpoint.isEmpty()) { + return AWSDatabaseMigrationServiceClientBuilder + .standard() + .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName())) + .withCredentials(awsCredentialsProvider) + .build(); + } else { + return AWSDatabaseMigrationServiceClientBuilder + .standard() + .withCredentials(awsCredentialsProvider) + .withRegion(regions) + .build(); + } + } + +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonElasticMapReduceClientFactory.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonElasticMapReduceClientFactory.java new file mode 100644 index 0000000000..ea00473b79 --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonElasticMapReduceClientFactory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +import java.util.Map; + +import lombok.experimental.UtilityClass; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce; +import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClientBuilder; + +@UtilityClass +public class AmazonElasticMapReduceClientFactory { + + public AmazonElasticMapReduce createAmazonElasticMapReduceClient(Map awsProperties) { + AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties); + Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION)); + String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT); + + if (endpoint != null && !endpoint.isEmpty()) { + return AmazonElasticMapReduceClientBuilder + .standard() + .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName())) + .withCredentials(awsCredentialsProvider) + .build(); + } else { + return AmazonElasticMapReduceClientBuilder + .standard() + .withCredentials(awsCredentialsProvider) + .withRegion(regions) + .build(); + } + } + +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonS3ClientFactory.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonS3ClientFactory.java new file mode 100644 index 0000000000..c45e4de9ea --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonS3ClientFactory.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +import java.util.Map; + +import lombok.experimental.UtilityClass; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; + +@UtilityClass +public class AmazonS3ClientFactory { + + public AmazonS3 createAmazonS3Client(Map awsProperties) { + AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties); + Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION)); + String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT); + + if (endpoint != null && !endpoint.isEmpty()) { + return AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName())) + .withCredentials(awsCredentialsProvider) + .build(); + } else { + return AmazonS3ClientBuilder + .standard() + .withCredentials(awsCredentialsProvider) + .withRegion(regions) + .build(); + } + } + +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonSageMakerClientFactory.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonSageMakerClientFactory.java new file mode 100644 index 0000000000..6bff921894 --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonSageMakerClientFactory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +import java.util.Map; + +import lombok.experimental.UtilityClass; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.sagemaker.AmazonSageMaker; +import com.amazonaws.services.sagemaker.AmazonSageMakerClientBuilder; + +@UtilityClass +public class AmazonSageMakerClientFactory { + + public AmazonSageMaker createAmazonSageMakerClient(Map awsProperties) { + AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties); + Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION)); + String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT); + + if (endpoint != null && !endpoint.isEmpty()) { + return AmazonSageMakerClientBuilder + .standard() + .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName())) + .withCredentials(awsCredentialsProvider) + .build(); + } else { + return AmazonSageMakerClientBuilder + .standard() + .withCredentials(awsCredentialsProvider) + .withRegion(regions) + .build(); + } + } + +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AwsConfigurationKeys.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AwsConfigurationKeys.java new file mode 100644 index 0000000000..3d2d8677b3 --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AwsConfigurationKeys.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +public class AwsConfigurationKeys { + + public static final String AWS_AUTHENTICATION_TYPE = "credentials.provider.type"; + public static final String AWS_REGION = "region"; + public static final String AWS_ENDPOINT = "endpoint"; + + public static final String AWS_ACCESS_KEY_ID = "access.key.id"; + public static final String AWS_SECRET = "access.key.secret"; +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/DataSyncClientFactory.java b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/DataSyncClientFactory.java new file mode 100644 index 0000000000..b67c7dd840 --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/DataSyncClientFactory.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.authentication.aws; + +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.datasync.DataSyncClient; + +import java.util.Map; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class DataSyncClientFactory { + + public DataSyncClient createDataSyncClient(Map awsProperties) { + // todo: upgrade the version of aws sdk + String awsAccessKeyId = awsProperties.get(AwsConfigurationKeys.AWS_ACCESS_KEY_ID); + String awsSecretAccessKey = awsProperties.get(AwsConfigurationKeys.AWS_SECRET); + final AwsBasicCredentials basicAWSCredentials = AwsBasicCredentials.create(awsAccessKeyId, awsSecretAccessKey); + final AwsCredentialsProvider awsCredentialsProvider = StaticCredentialsProvider.create(basicAWSCredentials); + + // create a datasync client + return DataSyncClient.builder() + .region(Region.of(awsProperties.get(AwsConfigurationKeys.AWS_REGION))) + .credentialsProvider(awsCredentialsProvider) + .build(); + } + +} diff --git a/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources/aws.yaml b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources/aws.yaml new file mode 100644 index 0000000000..6d453bb78a --- /dev/null +++ b/dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources/aws.yaml @@ -0,0 +1,65 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +aws: + s3: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: accessKey123 + access.key.secret: secretKey123 + region: us-east-1 + bucket.name: dolphinscheduler + endpoint: http://s3:9000 + emr: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + sagemaker: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + dms: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + datasync: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + diff --git a/dolphinscheduler-authentication/pom.xml b/dolphinscheduler-authentication/pom.xml new file mode 100644 index 0000000000..b49c4d37d0 --- /dev/null +++ b/dolphinscheduler-authentication/pom.xml @@ -0,0 +1,46 @@ + + + + 4.0.0 + + org.apache.dolphinscheduler + dolphinscheduler + dev-SNAPSHOT + + + dolphinscheduler-authentication + pom + + + dolphinscheduler-aws-authentication + + + + + + org.apache.dolphinscheduler + dolphinscheduler-bom + ${project.version} + pom + import + + + + + diff --git a/dolphinscheduler-common/pom.xml b/dolphinscheduler-common/pom.xml index eda9a72e30..6c83c580ae 100644 --- a/dolphinscheduler-common/pom.xml +++ b/dolphinscheduler-common/pom.xml @@ -41,6 +41,10 @@ + + org.apache.dolphinscheduler + dolphinscheduler-aws-authentication + commons-io commons-io diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java index cc07accc9b..0f12f11d00 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java @@ -36,6 +36,7 @@ public final class Constants { public static final String COMMON_PROPERTIES_PATH = "/common.properties"; public static final String REMOTE_LOGGING_YAML_PATH = "/remote-logging.yaml"; + public static final String AWS_YAML_PATH = "/aws.yaml"; public static final String FORMAT_SS = "%s%s"; public static final String FORMAT_S_S = "%s/%s"; @@ -130,8 +131,7 @@ public final class Constants { */ public static final String RESOURCE_STORAGE_TYPE = "resource.storage.type"; - public static final String AWS_S3_BUCKET_NAME = "resource.aws.s3.bucket.name"; - public static final String AWS_END_POINT = "resource.aws.s3.endpoint"; + public static final String AWS_S3_BUCKET_NAME = "aws.s3.bucket.name"; public static final String ALIBABA_CLOUD_OSS_BUCKET_NAME = "resource.alibaba.cloud.oss.bucket.name"; public static final String ALIBABA_CLOUD_OSS_END_POINT = "resource.alibaba.cloud.oss.endpoint"; @@ -704,19 +704,8 @@ public final class Constants { public static final String REMOTE_LOGGING_OSS_ENDPOINT = "remote.logging.oss.endpoint"; - /** - * remote logging for S3 - */ - public static final String REMOTE_LOGGING_S3_ACCESS_KEY_ID = "remote.logging.s3.access.key.id"; - - public static final String REMOTE_LOGGING_S3_ACCESS_KEY_SECRET = "remote.logging.s3.access.key.secret"; - public static final String REMOTE_LOGGING_S3_BUCKET_NAME = "remote.logging.s3.bucket.name"; - public static final String REMOTE_LOGGING_S3_ENDPOINT = "remote.logging.s3.endpoint"; - - public static final String REMOTE_LOGGING_S3_REGION = "remote.logging.s3.region"; - /** * remote logging for GCS */ diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/S3RemoteLogHandler.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/S3RemoteLogHandler.java index 54dba2d5bd..4fef7b032a 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/S3RemoteLogHandler.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/S3RemoteLogHandler.java @@ -17,6 +17,7 @@ package org.apache.dolphinscheduler.common.log.remote; +import org.apache.dolphinscheduler.authentication.aws.AmazonS3ClientFactory; import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.utils.PropertyUtils; @@ -26,41 +27,25 @@ import java.io.Closeable; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.util.Map; import lombok.extern.slf4j.Slf4j; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectInputStream; @Slf4j public class S3RemoteLogHandler implements RemoteLogHandler, Closeable { - private String accessKeyId; + private final String bucketName; - private String accessKeySecret; - - private String region; - - private String bucketName; - - private String endPoint; - - private AmazonS3 s3Client; + private final AmazonS3 s3Client; private static S3RemoteLogHandler instance; private S3RemoteLogHandler() { - accessKeyId = readAccessKeyID(); - accessKeySecret = readAccessKeySecret(); - region = readRegion(); bucketName = readBucketName(); - endPoint = readEndPoint(); s3Client = buildS3Client(); checkBucketNameExists(bucketName); } @@ -74,23 +59,8 @@ public class S3RemoteLogHandler implements RemoteLogHandler, Closeable { } protected AmazonS3 buildS3Client() { - if (StringUtils.isNotEmpty(endPoint)) { - return AmazonS3ClientBuilder - .standard() - .withPathStyleAccessEnabled(true) - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration( - endPoint, Regions.fromName(region).getName())) - .withCredentials( - new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret))) - .build(); - } else { - return AmazonS3ClientBuilder - .standard() - .withCredentials( - new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret))) - .withRegion(Regions.fromName(region)) - .build(); - } + Map awsProperties = PropertyUtils.getByPrefix("aws.s3.", ""); + return AmazonS3ClientFactory.createAmazonS3Client(awsProperties); } @Override @@ -131,24 +101,8 @@ public class S3RemoteLogHandler implements RemoteLogHandler, Closeable { } } - protected String readAccessKeyID() { - return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_ACCESS_KEY_ID); - } - - protected String readAccessKeySecret() { - return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_ACCESS_KEY_SECRET); - } - - protected String readRegion() { - return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_REGION); - } - protected String readBucketName() { - return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_BUCKET_NAME); - } - - protected String readEndPoint() { - return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_ENDPOINT); + return PropertyUtils.getString(Constants.AWS_S3_BUCKET_NAME); } public void checkBucketNameExists(String bucketName) { diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java index 82d4de9599..aee1589335 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java @@ -17,6 +17,7 @@ package org.apache.dolphinscheduler.common.utils; +import static org.apache.dolphinscheduler.common.constants.Constants.AWS_YAML_PATH; import static org.apache.dolphinscheduler.common.constants.Constants.COMMON_PROPERTIES_PATH; import static org.apache.dolphinscheduler.common.constants.Constants.REMOTE_LOGGING_YAML_PATH; @@ -42,7 +43,7 @@ public class PropertyUtils { private final ImmutablePriorityPropertyDelegate propertyDelegate = new ImmutablePriorityPropertyDelegate( new ImmutablePropertyDelegate(COMMON_PROPERTIES_PATH), - new ImmutableYamlDelegate(REMOTE_LOGGING_YAML_PATH)); + new ImmutableYamlDelegate(REMOTE_LOGGING_YAML_PATH, AWS_YAML_PATH)); public static String getString(String key) { return propertyDelegate.get(key.trim()); @@ -106,6 +107,19 @@ public class PropertyUtils { return matchedProperties; } + /** + * Get all properties with specified prefix, like: fs., will replace the prefix with newPrefix + */ + public static Map getByPrefix(String prefix, String newPrefix) { + Map matchedProperties = new HashMap<>(); + for (String propName : propertyDelegate.getPropertyKeys()) { + if (propName.startsWith(prefix)) { + matchedProperties.put(propName.replace(prefix, newPrefix), propertyDelegate.get(propName)); + } + } + return matchedProperties; + } + public static Set getSet(String key, Function> transformFunction, Set defaultValue) { return propertyDelegate.get(key, transformFunction, defaultValue); } diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index fdb553b4bc..cf1723700e 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -39,17 +39,6 @@ resource.azure.tenant.id=minioadmin # The query interval resource.query.interval=10000 -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id=minioadmin -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key=minioadmin -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region=cn-north-1 -# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. -resource.aws.s3.bucket.name=dolphinscheduler -# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint=http://localhost:9000 - # alibaba cloud access key id, required if you set resource.storage.type=OSS resource.alibaba.cloud.access.key.id= # alibaba cloud access key secret, required if you set resource.storage.type=OSS @@ -188,16 +177,7 @@ remote.logging.oss.access.key.secret= remote.logging.oss.bucket.name= # oss endpoint, required if you set remote.logging.target=OSS remote.logging.oss.endpoint= -# s3 access key id, required if you set remote.logging.target=S3 -remote.logging.s3.access.key.id= -# s3 access key secret, required if you set remote.logging.target=S3 -remote.logging.s3.access.key.secret= -# s3 bucket name, required if you set remote.logging.target=S3 -remote.logging.s3.bucket.name= -# s3 endpoint, required if you set remote.logging.target=S3 -remote.logging.s3.endpoint= -# s3 region, required if you set remote.logging.target=S3 -remote.logging.s3.region= + # the location of the google cloud credential, required if you set remote.logging.target=GCS remote.logging.google.cloud.storage.credential=/path/to/credential # gcs bucket name, required if you set remote.logging.target=GCS diff --git a/dolphinscheduler-common/src/main/resources/remote-logging.yaml b/dolphinscheduler-common/src/main/resources/remote-logging.yaml index 2cb48750a4..f413958e64 100644 --- a/dolphinscheduler-common/src/main/resources/remote-logging.yaml +++ b/dolphinscheduler-common/src/main/resources/remote-logging.yaml @@ -34,28 +34,16 @@ remote-logging: bucket.name: # oss endpoint, required if you set remote-logging.target=OSS endpoint: - # required if you set remote-logging.target=S3 - s3: - # s3 access key id, required if you set remote-logging.target=S3 - access.key.id: - # s3 access key secret, required if you set remote-logging.target=S3 - access.key.secret: - # s3 bucket name, required if you set remote-logging.target=S3 - bucket.name: - # s3 endpoint, required if you set remote-logging.target=S3 - endpoint: - # s3 region, required if you set remote-logging.target=S3 - region: google.cloud.storage: # the location of the google cloud credential, required if you set remote-logging.target=GCS credential: /path/to/credential - # gcs bucket name, required if you set remote-logging.target=GCS + # gcs bucket name, required if you set remote-logging.target=GCS bucket.name: abs: # abs account name, required if you set resource.storage.type=ABS account.name: - # abs account key, required if you set resource.storage.type=ABS + # abs account key, required if you set resource.storage.type=ABS account.key: - # abs container name, required if you set resource.storage.type=ABS + # abs container name, required if you set resource.storage.type=ABS container.name: diff --git a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java index c915197844..8abfb38269 100644 --- a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java +++ b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java @@ -17,12 +17,15 @@ package org.apache.dolphinscheduler.common.utils; +import static com.google.common.truth.Truth.assertThat; + import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.commons.lang3.StringUtils; import java.util.Arrays; import java.util.Collections; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -48,4 +51,14 @@ public class PropertyUtilsTest { }, Sets.newHashSet("docker0")); Assertions.assertEquals(Sets.newHashSet("docker0"), networkInterface); } + + @Test + void getByPrefix() { + Map awsProperties = PropertyUtils.getByPrefix("resource.aws.", ""); + assertThat(awsProperties).containsEntry("access.key.id", "minioadmin"); + assertThat(awsProperties).containsEntry("secret.access.key", "minioadmin"); + assertThat(awsProperties).containsEntry("region", "cn-north-1"); + assertThat(awsProperties).containsEntry("s3.bucket.name", "dolphinscheduler"); + assertThat(awsProperties).containsEntry("endpoint", "http://localhost:9000"); + } } diff --git a/dolphinscheduler-common/src/test/resources/common.properties b/dolphinscheduler-common/src/test/resources/common.properties index 7f66a32a23..ce8ef3bf4f 100644 --- a/dolphinscheduler-common/src/test/resources/common.properties +++ b/dolphinscheduler-common/src/test/resources/common.properties @@ -45,6 +45,10 @@ resource.azure.tenant.id=minioadmin # The query interval resource.query.interval=10000 +# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider +# AWSStaticCredentialsProvider: use the access key and secret key to authenticate +# InstanceProfileCredentialsProvider: use the IAM role to authenticate +aws.credentials.provider.type=AWSStaticCredentialsProvider # The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required resource.aws.access.key.id=minioadmin # The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required @@ -54,7 +58,7 @@ resource.aws.region=cn-north-1 # The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. resource.aws.s3.bucket.name=dolphinscheduler # You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint=http://localhost:9000 +resource.aws.endpoint=http://localhost:9000 # alibaba cloud access key id, required if you set resource.storage.type=OSS resource.alibaba.cloud.access.key.id= diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/aws.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/aws.yaml new file mode 100644 index 0000000000..6d453bb78a --- /dev/null +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/aws.yaml @@ -0,0 +1,65 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +aws: + s3: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: accessKey123 + access.key.secret: secretKey123 + region: us-east-1 + bucket.name: dolphinscheduler + endpoint: http://s3:9000 + emr: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + sagemaker: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + dms: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + datasync: + # The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider + # AWSStaticCredentialsProvider: use the access key and secret key to authenticate + # InstanceProfileCredentialsProvider: use the IAM role to authenticate + credentials.provider.type: AWSStaticCredentialsProvider + access.key.id: minioadmin + access.key.secret: minioadmin + region: cn-north-1 + endpoint: http://localhost:9000 + diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index 7583b3293a..604befdbf8 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -37,17 +37,6 @@ resource.azure.tenant.id=minioadmin # The query interval resource.query.interval=10000 -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id=accessKey123 -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key=secretKey123 -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region=us-east-1 -# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. -resource.aws.s3.bucket.name=dolphinscheduler -# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint=http://s3:9000 - # alibaba cloud access key id, required if you set resource.storage.type=OSS resource.alibaba.cloud.access.key.id= # alibaba cloud access key secret, required if you set resource.storage.type=OSS diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml index 9a46ed02ad..ccfe940a6d 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml @@ -34,6 +34,7 @@ services: retries: 120 volumes: - ./common.properties:/opt/dolphinscheduler/conf/common.properties + - ./aws.yaml:/opt/dolphinscheduler/conf/aws.yaml depends_on: s3: condition: service_healthy diff --git a/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml b/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml index d521e53bc2..f069b07079 100644 --- a/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml +++ b/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml @@ -56,6 +56,13 @@ conf + + ${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources + + **/*.yaml + + conf + diff --git a/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml b/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml index db312b7298..480301718e 100644 --- a/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml +++ b/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml @@ -86,6 +86,13 @@ conf + + ${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources + + **/*.yaml + + conf + ${basedir}/../dolphinscheduler-api/src/main/resources diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java index a13611316e..3e4b207b50 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java @@ -17,19 +17,18 @@ package org.apache.dolphinscheduler.plugin.storage.s3; -import static org.apache.dolphinscheduler.common.constants.Constants.AWS_END_POINT; import static org.apache.dolphinscheduler.common.constants.Constants.FOLDER_SEPARATOR; import static org.apache.dolphinscheduler.common.constants.Constants.FORMAT_S_S; import static org.apache.dolphinscheduler.common.constants.Constants.RESOURCE_TYPE_FILE; import static org.apache.dolphinscheduler.common.constants.Constants.RESOURCE_TYPE_UDF; +import org.apache.dolphinscheduler.authentication.aws.AmazonS3ClientFactory; import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.enums.ResUploadType; import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity; import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate; -import org.apache.dolphinscheduler.plugin.task.api.TaskConstants; import org.apache.dolphinscheduler.spi.enums.ResourceType; import org.apache.commons.lang3.StringUtils; @@ -57,11 +56,7 @@ import lombok.Data; import lombok.extern.slf4j.Slf4j; import com.amazonaws.AmazonServiceException; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.AmazonS3Exception; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.ListObjectsV2Request; @@ -79,71 +74,27 @@ import com.amazonaws.services.s3.transfer.TransferManagerBuilder; @Data public class S3StorageOperator implements Closeable, StorageOperate { - private String accessKeyId; - - private String accessKeySecret; - - private String region; - private String bucketName; - private String endPoint; - private AmazonS3 s3Client; public S3StorageOperator() { } public void init() { - accessKeyId = readAccessKeyID(); - accessKeySecret = readAccessKeySecret(); - region = readRegion(); bucketName = readBucketName(); - endPoint = readEndPoint(); s3Client = buildS3Client(); checkBucketNameExists(bucketName); } protected AmazonS3 buildS3Client() { - if (!StringUtils.isEmpty(endPoint)) { - return AmazonS3ClientBuilder - .standard() - .withPathStyleAccessEnabled(true) - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration( - endPoint, region)) - .withCredentials( - new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret))) - .build(); - } else { - return AmazonS3ClientBuilder - .standard() - .withCredentials( - new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret))) - .withRegion(region) - .build(); - } - } - - protected String readAccessKeyID() { - return PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID); - } - - protected String readAccessKeySecret() { - return PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY); - } - - protected String readRegion() { - return PropertyUtils.getString(TaskConstants.AWS_REGION); + return AmazonS3ClientFactory.createAmazonS3Client(PropertyUtils.getByPrefix("aws.s3.", "")); } protected String readBucketName() { return PropertyUtils.getString(Constants.AWS_S3_BUCKET_NAME); } - protected String readEndPoint() { - return PropertyUtils.getString(AWS_END_POINT); - } - @Override public void close() throws IOException { s3Client.shutdown(); diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/test/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperatorTest.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/test/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperatorTest.java index 0c3f75d861..c5930d713d 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/test/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperatorTest.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/test/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperatorTest.java @@ -78,13 +78,7 @@ public class S3StorageOperatorTest { public void setUp() throws Exception { s3StorageOperator = Mockito.spy(new S3StorageOperator()); - doReturn(ACCESS_KEY_ID_MOCK).when(s3StorageOperator) - .readAccessKeyID(); - doReturn(ACCESS_KEY_SECRET_MOCK).when(s3StorageOperator) - .readAccessKeySecret(); - doReturn(REGION_MOCK).when(s3StorageOperator).readRegion(); doReturn(BUCKET_NAME_MOCK).when(s3StorageOperator).readBucketName(); - doReturn(END_POINT_MOCK).when(s3StorageOperator).readEndPoint(); Mockito.doReturn(s3Client) .when(s3StorageOperator).buildS3Client(); Mockito.doNothing() @@ -96,9 +90,6 @@ public class S3StorageOperatorTest { @Test public void testInit() { verify(s3StorageOperator, times(1)).buildS3Client(); - Assertions.assertEquals(ACCESS_KEY_ID_MOCK, s3StorageOperator.getAccessKeyId()); - Assertions.assertEquals(ACCESS_KEY_SECRET_MOCK, s3StorageOperator.getAccessKeySecret()); - Assertions.assertEquals(REGION_MOCK, s3StorageOperator.getRegion()); Assertions.assertEquals(BUCKET_NAME_MOCK, s3StorageOperator.getBucketName()); } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties index 402112263f..40e1c5abcb 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties @@ -26,17 +26,6 @@ resource.storage.type=NONE # resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended resource.storage.upload.base.path=/dolphinscheduler -# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.access.key.id=minioadmin -# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.secret.access.key=minioadmin -# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required -resource.aws.region=cn-north-1 -# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. -resource.aws.s3.bucket.name=dolphinscheduler -# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn -resource.aws.s3.endpoint=http://localhost:9000 - # alibaba cloud access key id, required if you set resource.storage.type=OSS resource.alibaba.cloud.access.key.id= # alibaba cloud access key secret, required if you set resource.storage.type=OSS diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/pom.xml b/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/pom.xml index b3e1edb79f..5ec1ce5766 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/pom.xml +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/pom.xml @@ -31,22 +31,18 @@ org.apache.dolphinscheduler dolphinscheduler-spi - provided org.apache.dolphinscheduler dolphinscheduler-task-api - provided - - software.amazon.awssdk - datasync + org.apache.dolphinscheduler + dolphinscheduler-aws-authentication org.apache.dolphinscheduler dolphinscheduler-common - provided diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/src/main/java/org/apache/dolphinscheduler/plugin/task/datasync/DatasyncHook.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/src/main/java/org/apache/dolphinscheduler/plugin/task/datasync/DatasyncHook.java index a9f855503a..aff7ba2558 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/src/main/java/org/apache/dolphinscheduler/plugin/task/datasync/DatasyncHook.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/src/main/java/org/apache/dolphinscheduler/plugin/task/datasync/DatasyncHook.java @@ -17,13 +17,9 @@ package org.apache.dolphinscheduler.plugin.task.datasync; +import org.apache.dolphinscheduler.authentication.aws.DataSyncClientFactory; import org.apache.dolphinscheduler.common.utils.PropertyUtils; -import org.apache.dolphinscheduler.plugin.task.api.TaskConstants; -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.datasync.DataSyncClient; import software.amazon.awssdk.services.datasync.model.CancelTaskExecutionRequest; import software.amazon.awssdk.services.datasync.model.CancelTaskExecutionResponse; @@ -48,6 +44,7 @@ import org.apache.commons.lang3.StringUtils; import java.lang.reflect.InvocationTargetException; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import lombok.Data; @@ -73,16 +70,8 @@ public class DatasyncHook { } protected static DataSyncClient createClient() { - final String awsAccessKeyId = PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID); - final String awsSecretAccessKey = PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY); - final String awsRegion = PropertyUtils.getString(TaskConstants.AWS_REGION); - - final AwsBasicCredentials basicAWSCredentials = AwsBasicCredentials.create(awsAccessKeyId, awsSecretAccessKey); - final AwsCredentialsProvider awsCredentialsProvider = StaticCredentialsProvider.create(basicAWSCredentials); - - // create a datasync client - return DataSyncClient.builder().region(Region.of(awsRegion)).credentialsProvider(awsCredentialsProvider) - .build(); + Map awsProperties = PropertyUtils.getByPrefix("aws.datasync.", ""); + return DataSyncClientFactory.createDataSyncClient(awsProperties); } public Boolean createDatasyncTask(DatasyncParameters parameters) { diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/pom.xml b/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/pom.xml index 8a3bb85413..4a2be06b47 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/pom.xml +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/pom.xml @@ -25,27 +25,23 @@ dolphinscheduler-task-dms - jar org.apache.dolphinscheduler dolphinscheduler-spi - provided org.apache.dolphinscheduler dolphinscheduler-task-api - provided org.apache.dolphinscheduler dolphinscheduler-common - provided - com.amazonaws - aws-java-sdk-dms + org.apache.dolphinscheduler + dolphinscheduler-aws-authentication diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/src/main/java/org/apache/dolphinscheduler/plugin/task/dms/DmsHook.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/src/main/java/org/apache/dolphinscheduler/plugin/task/dms/DmsHook.java index 40aa6a527d..cf2306abb1 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/src/main/java/org/apache/dolphinscheduler/plugin/task/dms/DmsHook.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-dms/src/main/java/org/apache/dolphinscheduler/plugin/task/dms/DmsHook.java @@ -17,9 +17,9 @@ package org.apache.dolphinscheduler.plugin.task.dms; +import org.apache.dolphinscheduler.authentication.aws.AWSDatabaseMigrationServiceClientFactory; import org.apache.dolphinscheduler.common.thread.ThreadUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; -import org.apache.dolphinscheduler.plugin.task.api.TaskConstants; import org.apache.commons.io.IOUtils; @@ -29,6 +29,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Date; import java.util.List; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Data; @@ -37,11 +38,7 @@ import lombok.NoArgsConstructor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationService; -import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationServiceClientBuilder; import com.amazonaws.services.databasemigrationservice.model.CreateReplicationTaskRequest; import com.amazonaws.services.databasemigrationservice.model.CreateReplicationTaskResult; import com.amazonaws.services.databasemigrationservice.model.DeleteReplicationTaskRequest; @@ -87,17 +84,8 @@ public class DmsHook { } public static AWSDatabaseMigrationService createClient() { - final String awsAccessKeyId = PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID); - final String awsSecretAccessKey = PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY); - final String awsRegion = PropertyUtils.getString(TaskConstants.AWS_REGION); - final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey); - final AWSCredentialsProvider awsCredentialsProvider = new AWSStaticCredentialsProvider(basicAWSCredentials); - - // create a DMS client - return AWSDatabaseMigrationServiceClientBuilder.standard() - .withCredentials(awsCredentialsProvider) - .withRegion(awsRegion) - .build(); + Map awsProperties = PropertyUtils.getByPrefix("aws.dms.", ""); + return AWSDatabaseMigrationServiceClientFactory.createAWSDatabaseMigrationServiceClient(awsProperties); } public Boolean createReplicationTask() throws Exception { diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/pom.xml b/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/pom.xml index 969756a991..fa7e51062e 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/pom.xml +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/pom.xml @@ -28,19 +28,20 @@ jar + org.apache.dolphinscheduler dolphinscheduler-spi - provided + org.apache.dolphinscheduler dolphinscheduler-task-api - provided + - com.amazonaws - aws-java-sdk-emr + org.apache.dolphinscheduler + dolphinscheduler-aws-authentication diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/src/main/java/org/apache/dolphinscheduler/plugin/task/emr/AbstractEmrTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/src/main/java/org/apache/dolphinscheduler/plugin/task/emr/AbstractEmrTask.java index 6f6ec63a29..412b0b86e8 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/src/main/java/org/apache/dolphinscheduler/plugin/task/emr/AbstractEmrTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-emr/src/main/java/org/apache/dolphinscheduler/plugin/task/emr/AbstractEmrTask.java @@ -22,22 +22,19 @@ import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKN import static com.fasterxml.jackson.databind.DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL; import static com.fasterxml.jackson.databind.MapperFeature.REQUIRE_SETTERS_FOR_GETTERS; +import org.apache.dolphinscheduler.authentication.aws.AmazonElasticMapReduceClientFactory; import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.task.api.AbstractRemoteTask; -import org.apache.dolphinscheduler.plugin.task.api.TaskConstants; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters; +import java.util.Map; import java.util.TimeZone; import lombok.extern.slf4j.Slf4j; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce; -import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClientBuilder; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; @@ -94,22 +91,8 @@ public abstract class AbstractEmrTask extends AbstractRemoteTask { return emrParameters; } - /** - * create emr client from BasicAWSCredentials - * - * @return AmazonElasticMapReduce - */ protected AmazonElasticMapReduce createEmrClient() { - - final String awsAccessKeyId = PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID); - final String awsSecretAccessKey = PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY); - final String awsRegion = PropertyUtils.getString(TaskConstants.AWS_REGION); - final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey); - final AWSCredentialsProvider awsCredentialsProvider = new AWSStaticCredentialsProvider(basicAWSCredentials); - // create an EMR client - return AmazonElasticMapReduceClientBuilder.standard() - .withCredentials(awsCredentialsProvider) - .withRegion(awsRegion) - .build(); + Map awsProperties = PropertyUtils.getByPrefix("aws.emr.", ""); + return AmazonElasticMapReduceClientFactory.createAmazonElasticMapReduceClient(awsProperties); } } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/pom.xml b/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/pom.xml index 7caaf28641..030ab3df58 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/pom.xml +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/pom.xml @@ -31,26 +31,23 @@ org.apache.dolphinscheduler dolphinscheduler-spi - provided org.apache.dolphinscheduler dolphinscheduler-task-api - provided org.apache.dolphinscheduler dolphinscheduler-common - provided - com.amazonaws - aws-java-sdk-sagemaker + org.apache.dolphinscheduler + dolphinscheduler-datasource-all + org.apache.dolphinscheduler - dolphinscheduler-datasource-all - ${project.version} + dolphinscheduler-aws-authentication diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/src/main/java/org/apache/dolphinscheduler/plugin/task/sagemaker/SagemakerTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/src/main/java/org/apache/dolphinscheduler/plugin/task/sagemaker/SagemakerTask.java index b1b2cc811f..595655c891 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/src/main/java/org/apache/dolphinscheduler/plugin/task/sagemaker/SagemakerTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/src/main/java/org/apache/dolphinscheduler/plugin/task/sagemaker/SagemakerTask.java @@ -22,7 +22,9 @@ import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKN import static com.fasterxml.jackson.databind.DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL; import static com.fasterxml.jackson.databind.MapperFeature.REQUIRE_SETTERS_FOR_GETTERS; +import org.apache.dolphinscheduler.authentication.aws.AmazonSageMakerClientFactory; import org.apache.dolphinscheduler.common.utils.JSONUtils; +import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.datasource.api.utils.DataSourceUtils; import org.apache.dolphinscheduler.plugin.datasource.sagemaker.param.SagemakerConnectionParam; import org.apache.dolphinscheduler.plugin.task.api.AbstractRemoteTask; @@ -41,11 +43,7 @@ import java.util.Map; import lombok.extern.slf4j.Slf4j; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.sagemaker.AmazonSageMaker; -import com.amazonaws.services.sagemaker.AmazonSageMakerClientBuilder; import com.amazonaws.services.sagemaker.model.StartPipelineExecutionRequest; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; @@ -186,16 +184,8 @@ public class SagemakerTask extends AbstractRemoteTask { } protected AmazonSageMaker createClient() { - final String awsAccessKeyId = parameters.getUsername(); - final String awsSecretAccessKey = parameters.getPassword(); - final String awsRegion = parameters.getAwsRegion(); - final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey); - final AWSCredentialsProvider awsCredentialsProvider = new AWSStaticCredentialsProvider(basicAWSCredentials); - // create a SageMaker client - return AmazonSageMakerClientBuilder.standard() - .withCredentials(awsCredentialsProvider) - .withRegion(awsRegion) - .build(); + Map awsProperties = PropertyUtils.getByPrefix("aws.sagemaker.", ""); + return AmazonSageMakerClientFactory.createAmazonSageMakerClient(awsProperties); } } diff --git a/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml b/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml index 5ac9b6350d..3507604bbe 100644 --- a/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml +++ b/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml @@ -57,6 +57,13 @@ conf + + ${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources + + **/*.yaml + + conf + diff --git a/pom.xml b/pom.xml index fc5e994bf9..43c476d317 100755 --- a/pom.xml +++ b/pom.xml @@ -57,6 +57,7 @@ dolphinscheduler-storage-plugin dolphinscheduler-extract dolphinscheduler-dao-plugin + dolphinscheduler-authentication @@ -294,6 +295,12 @@ ${project.version} + + org.apache.dolphinscheduler + dolphinscheduler-aws-authentication + ${project.version} + + org.junit junit-bom