Browse Source

[DSIP-42] Add dolphinscheduler-aws-authentication module (#16043)

upstream-dev
Wenjun Ruan 5 months ago committed by GitHub
parent
commit
f754611509
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 11
      deploy/kubernetes/dolphinscheduler/README.md
  2. 2
      deploy/kubernetes/dolphinscheduler/templates/configmap.yaml
  3. 21
      deploy/kubernetes/dolphinscheduler/values.yaml
  4. 22
      docs/docs/en/architecture/configuration.md
  5. 160
      docs/docs/en/guide/resource/configuration.md
  6. 17
      docs/docs/en/guide/task/dms.md
  7. 17
      docs/docs/en/guide/task/sagemaker.md
  8. 157
      docs/docs/zh/guide/resource/configuration.md
  9. 17
      docs/docs/zh/guide/task/dms.md
  10. 17
      docs/docs/zh/guide/task/sagemaker.md
  11. 7
      dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml
  12. 7
      dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml
  13. 64
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/pom.xml
  14. 74
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderFactor.java
  15. 49
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderType.java
  16. 53
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSDatabaseMigrationServiceClientFactory.java
  17. 53
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonElasticMapReduceClientFactory.java
  18. 54
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonS3ClientFactory.java
  19. 53
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonSageMakerClientFactory.java
  20. 28
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AwsConfigurationKeys.java
  21. 47
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/DataSyncClientFactory.java
  22. 65
      dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources/aws.yaml
  23. 46
      dolphinscheduler-authentication/pom.xml
  24. 4
      dolphinscheduler-common/pom.xml
  25. 15
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java
  26. 60
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/S3RemoteLogHandler.java
  27. 16
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java
  28. 22
      dolphinscheduler-common/src/main/resources/common.properties
  29. 18
      dolphinscheduler-common/src/main/resources/remote-logging.yaml
  30. 13
      dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java
  31. 6
      dolphinscheduler-common/src/test/resources/common.properties
  32. 65
      dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/aws.yaml
  33. 11
      dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties
  34. 1
      dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml
  35. 7
      dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml
  36. 7
      dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml
  37. 53
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java
  38. 9
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/test/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperatorTest.java
  39. 11
      dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties
  40. 8
      dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/pom.xml
  41. 19
      dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/src/main/java/org/apache/dolphinscheduler/plugin/task/datasync/DatasyncHook.java
  42. 8
      dolphinscheduler-task-plugin/dolphinscheduler-task-dms/pom.xml
  43. 20
      dolphinscheduler-task-plugin/dolphinscheduler-task-dms/src/main/java/org/apache/dolphinscheduler/plugin/task/dms/DmsHook.java
  44. 9
      dolphinscheduler-task-plugin/dolphinscheduler-task-emr/pom.xml
  45. 25
      dolphinscheduler-task-plugin/dolphinscheduler-task-emr/src/main/java/org/apache/dolphinscheduler/plugin/task/emr/AbstractEmrTask.java
  46. 11
      dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/pom.xml
  47. 18
      dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/src/main/java/org/apache/dolphinscheduler/plugin/task/sagemaker/SagemakerTask.java
  48. 7
      dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml
  49. 7
      pom.xml

11
deploy/kubernetes/dolphinscheduler/README.md

@ -120,6 +120,12 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst
| conf.auto | bool | `false` | auto restart, if true, all components will be restarted automatically after the common configuration is updated. if false, you need to restart the components manually. default is false |
| conf.common."alert.rpc.port" | int | `50052` | rpc port |
| conf.common."appId.collect" | string | `"log"` | way to collect applicationId: log, aop |
| conf.common."aws.credentials.provider.type" | string | `"AWSStaticCredentialsProvider"` | |
| conf.common."aws.s3.access.key.id" | string | `"minioadmin"` | The AWS access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required |
| conf.common."aws.s3.access.key.secret" | string | `"minioadmin"` | The AWS secret access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required |
| conf.common."aws.s3.bucket.name" | string | `"dolphinscheduler"` | The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. |
| conf.common."aws.s3.endpoint" | string | `"http://minio:9000"` | You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn |
| conf.common."aws.s3.region" | string | `"ca-central-1"` | The AWS Region to use. if resource.storage.type=S3, This configuration is required |
| conf.common."conda.path" | string | `"/opt/anaconda3/etc/profile.d/conda.sh"` | set path of conda.sh |
| conf.common."data-quality.jar.dir" | string | `nil` | data quality option |
| conf.common."data.basedir.path" | string | `"/tmp/dolphinscheduler"` | user data local directory path, please make sure the directory exists and have read write permissions |
@ -138,11 +144,6 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst
| conf.common."resource.alibaba.cloud.oss.bucket.name" | string | `"dolphinscheduler"` | oss bucket name, required if you set resource.storage.type=OSS |
| conf.common."resource.alibaba.cloud.oss.endpoint" | string | `"https://oss-cn-hangzhou.aliyuncs.com"` | oss bucket endpoint, required if you set resource.storage.type=OSS |
| conf.common."resource.alibaba.cloud.region" | string | `"cn-hangzhou"` | alibaba cloud region, required if you set resource.storage.type=OSS |
| conf.common."resource.aws.access.key.id" | string | `"minioadmin"` | The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required |
| conf.common."resource.aws.region" | string | `"ca-central-1"` | The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required |
| conf.common."resource.aws.s3.bucket.name" | string | `"dolphinscheduler"` | The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name. |
| conf.common."resource.aws.s3.endpoint" | string | `"http://minio:9000"` | You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn |
| conf.common."resource.aws.secret.access.key" | string | `"minioadmin"` | The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required |
| conf.common."resource.azure.client.id" | string | `"minioadmin"` | azure storage account name, required if you set resource.storage.type=ABS |
| conf.common."resource.azure.client.secret" | string | `"minioadmin"` | azure storage account key, required if you set resource.storage.type=ABS |
| conf.common."resource.azure.subId" | string | `"minioadmin"` | azure storage subId, required if you set resource.storage.type=ABS |

2
deploy/kubernetes/dolphinscheduler/templates/configmap.yaml

@ -37,7 +37,7 @@ data:
{{- range $key, $value := index .Values.conf "common" }}
{{- if and $.Values.minio.enabled }}
{{- if eq $key "resource.storage.type" }}{{ $value = "S3" }}{{- end }}
{{- if eq $key "resource.aws.s3.endpoint" }}{{ $value = print "http://" (include "dolphinscheduler.minio.fullname" $) ":9000" }}{{- end }}
{{- if eq $key "aws.s3.endpoint" }}{{ $value = print "http://" (include "dolphinscheduler.minio.fullname" $) ":9000" }}{{- end }}
{{- end }}
{{ $key }}={{ $value }}
{{- end }}

21
deploy/kubernetes/dolphinscheduler/values.yaml

@ -250,20 +250,25 @@ conf:
# -- resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path: /dolphinscheduler
# -- The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id: minioadmin
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
aws.credentials.provider.type: AWSStaticCredentialsProvider
# -- The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key: minioadmin
# -- The AWS access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required
aws.s3.access.key.id: minioadmin
# -- The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region: ca-central-1
# -- The AWS secret access key. if resource.storage.type=S3, and credentials.provider.type is AWSStaticCredentialsProvider. This configuration is required
aws.s3.access.key.secret: minioadmin
# -- The AWS Region to use. if resource.storage.type=S3, This configuration is required
aws.s3.region: ca-central-1
# -- The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name: dolphinscheduler
aws.s3.bucket.name: dolphinscheduler
# -- You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint: http://minio:9000
aws.s3.endpoint: http://minio:9000
# -- alibaba cloud access key id, required if you set resource.storage.type=OSS
resource.alibaba.cloud.access.key.id: <your-access-key-id>

22
docs/docs/en/architecture/configuration.md

@ -191,19 +191,21 @@ The default configuration is as follows:
Note that DolphinScheduler also supports zookeeper related configuration through `bin/env/dolphinscheduler_env.sh`.
For ETCD Registry, please see more details on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-etcd/README.md).
For JDBC Registry, please see more details on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-jdbc/README.md).
For ETCD Registry, please see more details
on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-etcd/README.md).
For JDBC Registry, please see more details
on [link](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-registry/dolphinscheduler-registry-plugins/dolphinscheduler-registry-jdbc/README.md).
### common.properties [hadoop、s3、yarn config properties]
Currently, common.properties mainly configures Hadoop,s3a related configurations. Configuration file location:
| Service | Configuration file |
|---------------|----------------------------------------|
| Master Server | `master-server/conf/common.properties` |
| Api Server | `api-server/conf/common.properties` |
| Worker Server | `worker-server/conf/common.properties` |
| Alert Server | `alert-server/conf/common.properties` |
| Service | Configuration file |
|---------------|-----------------------------------------------------------------------|
| Master Server | `master-server/conf/common.properties` |
| Api Server | `api-server/conf/common.properties`, `api-server/conf/aws.yaml` |
| Worker Server | `worker-server/conf/common.properties`, `worker-server/conf/aws.yaml` |
| Alert Server | `alert-server/conf/common.properties` |
The default configuration is as follows:
@ -212,10 +214,6 @@ The default configuration is as follows:
| data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files |
| resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE |
| resource.upload.path | /dolphinscheduler | storage path of resource files |
| aws.access.key.id | minioadmin | access key id of S3 |
| aws.secret.access.key | minioadmin | secret access key of S3 |
| aws.region | us-east-1 | region of S3 |
| aws.s3.endpoint | http://minio:9000 | endpoint of S3 |
| hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS |
| fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory |
| hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission |

160
docs/docs/en/guide/resource/configuration.md

@ -28,74 +28,37 @@ The configuration you may need to change:
## connect AWS S3
if you want to upload resources to `Resource Center` connected to `S3`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`. You can refer to the following:
if you want to upload resources to `Resource Center` connected to `S3`, you need to configure `api-server/conf/common.properties`, `api-server/conf/aws.yaml` and `worker-server/conf/common.properties`, `worker-server/conf/aws.yaml`. You can refer to the following:
config the following fields
```properties
......
resource.storage.type=S3
```
......
resource.aws.access.key.id=aws_access_key_id
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=aws_secret_access_key
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=us-west-2
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s4. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=
```yaml
aws:
s3:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: <access.key.id>
access.key.secret: <access.key.secret>
region: <region>
bucket.name: <bucket.name>
endpoint: <endpoint>
......
```
## Use HDFS or Remote Object Storage
## connect OSS S3
After version 3.0.0-alpha, if you want to upload resources to `Resource Center` connected to `HDFS`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`.
if you want to upload resources to `Resource Center` connected to `OSS`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`. You can refer to the following:
```properties
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# user data local directory path, please make sure the directory exists and have read write permissions
data.basedir.path=/tmp/dolphinscheduler
# resource view suffixs
#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js
# resource storage type: LOCAL, HDFS, S3, OSS, GCS, ABS, OBS
resource.storage.type=LOCAL
# resource store on HDFS/S3/OSS path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path=/tmp/dolphinscheduler
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=minioadmin
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=minioadmin
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=cn-north-1
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=http://localhost:9000
config the following fields
```properties
# alibaba cloud access key id, required if you set resource.storage.type=OSS
resource.alibaba.cloud.access.key.id=<your-access-key-id>
# alibaba cloud access key secret, required if you set resource.storage.type=OSS
@ -107,89 +70,24 @@ resource.alibaba.cloud.oss.bucket.name=dolphinscheduler
# oss bucket endpoint, required if you set resource.storage.type=OSS
resource.alibaba.cloud.oss.endpoint=https://oss-cn-hangzhou.aliyuncs.com
# alibaba cloud access key id, required if you set resource.storage.type=OBS
```
## connect OBS S3
if you want to upload resources to `Resource Center` connected to `OBS`, you need to configure `api-server/conf/common.properties` and `worker-server/conf/common.properties`. You can refer to the following:
config the following fields
```properties
# access key id, required if you set resource.storage.type=OBS
resource.huawei.cloud.access.key.id=<your-access-key-id>
# alibaba cloud access key secret, required if you set resource.storage.type=OBS
# access key secret, required if you set resource.storage.type=OBS
resource.huawei.cloud.access.key.secret=<your-access-key-secret>
# oss bucket name, required if you set resource.storage.type=OBS
resource.huawei.cloud.obs.bucket.name=dolphinscheduler
# oss bucket endpoint, required if you set resource.storage.type=OBS
resource.huawei.cloud.obs.endpoint=obs.cn-southwest-2.huaweicloud.com
# if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path
resource.hdfs.root.user=hdfs
# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir
resource.hdfs.fs.defaultFS=hdfs://mycluster:8020
# whether to startup kerberos
hadoop.security.authentication.startup.state=false
# java.security.krb5.conf path
java.security.krb5.conf.path=/opt/krb5.conf
# login user from keytab username
login.user.keytab.username=hdfs-mycluster@ESZ.COM
# login user from keytab path
login.user.keytab.path=/opt/hdfs.headless.keytab
# kerberos expire time, the unit is hour
kerberos.expire.time=2
# resourcemanager port, the default value is 8088 if not specified
resource.manager.httpaddress.port=8088
# if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty
yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx
# if resourcemanager HA is enabled or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname
yarn.application.status.address=http://ds1:%s/ws/v1/cluster/apps/%s
# job history status url when application number threshold is reached(default 10000, maybe it was set to 1000)
yarn.job.history.status.address=http://ds1:19888/ws/v1/history/mapreduce/jobs/%s
# datasource encryption enable
datasource.encryption.enable=false
# datasource encryption salt
datasource.encryption.salt=!@#$%^&*
# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in
# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server
# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`).
data-quality.jar.dir=
#data-quality.error.output.path=/tmp/data-quality-error-data
# Network IP gets priority, default inner outer
# Whether hive SQL is executed in the same session
support.hive.oneSession=false
# use sudo or not, if set true, executing user is tenant user and deploy user needs sudo permissions; if set false, executing user is the deploy user and doesn't need sudo permissions
sudo.enable=true
# network interface preferred like eth0, default: empty
#dolphin.scheduler.network.interface.preferred=
# network IP gets priority, default: inner outer
#dolphin.scheduler.network.priority.strategy=default
# system env path
#dolphinscheduler.env.path=dolphinscheduler_env.sh
# development state
development.state=false
# rpc port
alert.rpc.port=50052
# set path of conda.sh
conda.path=/opt/anaconda3/etc/profile.d/conda.sh
# Task resource limit state
task.resource.limit.state=false
# way to collect applicationId: log(original regex match), aop
appId.collect: log
```
> **Note:**

17
docs/docs/en/guide/task/dms.md

@ -73,14 +73,17 @@ Parameters of restarting the task by interface
## Environment to prepare
Some AWS configuration is required, modify a field in file `common.properties`
Some AWS configuration is required, modify a field in file `aws.yaml`
```yaml
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=<YOUR AWS ACCESS KEY>
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=<YOUR AWS SECRET KEY>
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=<AWS REGION>
dms:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: <access.key.id>
access.key.secret: <access.key.secret>
region: <region>
endpoint: <endpoint>
```

17
docs/docs/en/guide/task/sagemaker.md

@ -35,14 +35,17 @@ The task plugin are shown as follows:
## Environment to prepare
Some AWS configuration is required, modify a field in file `common.properties`
Some AWS configuration is required, modify a field in file `aws.yaml`
```yaml
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=<YOUR AWS ACCESS KEY>
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=<YOUR AWS SECRET KEY>
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=<AWS REGION>
sagemaker:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: <access.key.id>
access.key.secret: <access.key.secret>
region: <region>
endpoint: <endpoint>
```

157
docs/docs/zh/guide/resource/configuration.md

@ -26,77 +26,35 @@ Dolphinscheduler 资源中心使用本地系统默认是开启的,不需要用
## 对接AWS S3
如果需要使用到资源中心的 S3 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下:
如果需要使用到资源中心的 S3 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties`, `api-server/conf/aws.yaml``worker-server/conf/common.properties`, `worker-server/conf/aws.yaml`。可参考如下:
配置以下字段
```properties
......
resource.storage.type=S3
......
resource.aws.access.key.id=aws_access_key_id
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=aws_secret_access_key
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=us-west-2
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s4. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=
......
```
## 对接分布式或远端对象存储
```yaml
aws:
s3:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: <access.key.id>
access.key.secret: <access.key.secret>
region: <region>
bucket.name: <bucket.name>
endpoint: <endpoint>
当需要使用资源中心进行相关文件的创建或者上传操作时,所有的文件和资源都会被存储在分布式文件系统`HDFS`或者远端的对象存储,如`S3`上。所以需要进行以下配置:
```
### 配置 common.properties 文件
## 对接阿里云 OSS
在 3.0.0-alpha 版本之后,如果需要使用到资源中心的 HDFS 或 S3 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下:
如果需要使用到资源中心的 OSS 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下:
```properties
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# user data local directory path, please make sure the directory exists and have read write permissions
data.basedir.path=/tmp/dolphinscheduler
# resource storage type: LOCAL, HDFS, S3, OSS, GCS, ABS, OBS
resource.storage.type=LOCAL
# resource store on HDFS/S3/OSS path, resource file will store to this hadoop hdfs path, self configuration,
# please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path=/tmp/dolphinscheduler
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=minioadmin
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=minioadmin
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=cn-north-1
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=http://localhost:9000
# alibaba cloud access key id, required if you set resource.storage.type=OSS
resource.alibaba.cloud.access.key.id=<your-access-key-id>
# alibaba cloud access key secret, required if you set resource.storage.type=OSS
@ -108,87 +66,22 @@ resource.alibaba.cloud.oss.bucket.name=dolphinscheduler
# oss bucket endpoint, required if you set resource.storage.type=OSS
resource.alibaba.cloud.oss.endpoint=https://oss-cn-hangzhou.aliyuncs.com
# alibaba cloud access key id, required if you set resource.storage.type=OBS
```
## 对接华为云 OBS
如果需要使用到资源中心的 OBS 上传资源,我们需要对以下路径的进行配置:`api-server/conf/common.properties` 和 `worker-server/conf/common.properties`。可参考如下:
```properties
# access key id, required if you set resource.storage.type=OBS
resource.huawei.cloud.access.key.id=<your-access-key-id>
# alibaba cloud access key secret, required if you set resource.storage.type=OBS
# access key secret, required if you set resource.storage.type=OBS
resource.huawei.cloud.access.key.secret=<your-access-key-secret>
# oss bucket name, required if you set resource.storage.type=OBS
resource.huawei.cloud.obs.bucket.name=dolphinscheduler
# oss bucket endpoint, required if you set resource.storage.type=OBS
resource.huawei.cloud.obs.endpoint=obs.cn-southwest-2.huaweicloud.com
# if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path
resource.hdfs.root.user=root
# if resource.storage.type=S3, the value like: s3a://dolphinscheduler;
# if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir
resource.hdfs.fs.defaultFS=hdfs://localhost:8020
# whether to startup kerberos
hadoop.security.authentication.startup.state=false
# java.security.krb5.conf path
java.security.krb5.conf.path=/opt/krb5.conf
# login user from keytab username
login.user.keytab.username=hdfs-mycluster@ESZ.COM
# login user from keytab path
login.user.keytab.path=/opt/hdfs.headless.keytab
# kerberos expire time, the unit is hour
kerberos.expire.time=2
# resource view suffixs
#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js
# resourcemanager port, the default value is 8088 if not specified
resource.manager.httpaddress.port=8088
# if resourcemanager HA is enabled, please set the HA IPs; if resourcemanager is single, keep this value empty
yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx
# if resourcemanager HA is enabled or not use resourcemanager, please keep the default value;
# If resourcemanager is single, you only need to replace ds1 to actual resourcemanager hostname
yarn.application.status.address=http://localhost:%s/ds/v1/cluster/apps/%s
# job history status url when application number threshold is reached(default 10000, maybe it was set to 1000)
yarn.job.history.status.address=http://localhost:19888/ds/v1/history/mapreduce/jobs/%s
# datasource encryption enable
datasource.encryption.enable=false
# datasource encryption salt
datasource.encryption.salt=!@#$%^&*
# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in
# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server
# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`).
data-quality.jar.dir=
#data-quality.error.output.path=/tmp/data-quality-error-data
# Network IP gets priority, default inner outer
# Whether hive SQL is executed in the same session
support.hive.oneSession=false
# use sudo or not, if set true, executing user is tenant user and deploy user needs sudo permissions;
# if set false, executing user is the deploy user and doesn't need sudo permissions
sudo.enable=true
# network interface preferred like eth0, default: empty
#dolphin.scheduler.network.interface.preferred=
# network IP gets priority, default: inner outer
#dolphin.scheduler.network.priority.strategy=default
# system env path
#dolphinscheduler.env.path=env/dolphinscheduler_env.sh
# development state
development.state=false
# rpc port
alert.rpc.port=50052
# way to collect applicationId: log(original regex match), aop
appId.collect: log
```
> **注意**

17
docs/docs/zh/guide/task/dms.md

@ -73,14 +73,17 @@ DolphinScheduler 在 启动DMS 任务后,会跟中DMS任务状态,直至DMS
## 环境配置
需要进行AWS的一些配置,修改`common.properties`中的以下配置信息
需要进行AWS的一些配置,修改`aws.yml`中的以下配置信息
```yaml
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=<YOUR AWS ACCESS KEY>
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=<YOUR AWS SECRET KEY>
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=<AWS REGION>
dms:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: <access.key.id>
access.key.secret: <access.key.secret>
region: <region>
endpoint: <endpoint>
```

17
docs/docs/zh/guide/task/sagemaker.md

@ -33,14 +33,17 @@ DolphinScheduler SageMaker 组件的功能:
## 环境配置
需要进行AWS的一些配置,修改`common.properties`中的`xxxxx`为你的配置信息
需要进行AWS的一些配置,修改`aws.yml`中的以下配置信息
```yaml
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=<YOUR AWS ACCESS KEY>
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=<YOUR AWS SECRET KEY>
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=<AWS REGION>
sagemaker:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: <access.key.id>
access.key.secret: <access.key.secret>
region: <region>
endpoint: <endpoint>
```

7
dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml

@ -56,6 +56,13 @@
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources</directory>
<includes>
<include>**/*.yaml</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>

7
dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml

@ -57,6 +57,13 @@
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources</directory>
<includes>
<include>**/*.yaml</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-ui/dist</directory>
<outputDirectory>./ui</outputDirectory>

64
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/pom.xml

@ -0,0 +1,64 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-authentication</artifactId>
<version>dev-SNAPSHOT</version>
</parent>
<artifactId>dolphinscheduler-aws-authentication</artifactId>
<dependencies>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-emr</artifactId>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-sagemaker</artifactId>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-dms</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>datasync</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
</project>

74
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderFactor.java

@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
import static org.apache.dolphinscheduler.authentication.aws.AwsConfigurationKeys.AWS_AUTHENTICATION_TYPE;
import java.util.Map;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
@Slf4j
@UtilityClass
public class AWSCredentialsProviderFactor {
public static AWSCredentialsProvider credentialsProvider(Map<String, String> awsProperties) {
String awsAuthenticationType = awsProperties.getOrDefault(
AWS_AUTHENTICATION_TYPE, AWSCredentialsProviderType.STATIC_CREDENTIALS_PROVIDER.getName());
AWSCredentialsProviderType awsCredentialsProviderType =
AWSCredentialsProviderType.of(awsAuthenticationType).orElse(null);
if (awsCredentialsProviderType == null) {
throw new IllegalArgumentException(
"The aws.credentials.provider.type: " + awsAuthenticationType + " is invalidated");
}
switch (awsCredentialsProviderType) {
case STATIC_CREDENTIALS_PROVIDER:
return createAWSStaticCredentialsProvider(awsProperties);
case INSTANCE_PROFILE_CREDENTIALS_PROVIDER:
return createInstanceProfileCredentialsProvider();
default:
throw new IllegalArgumentException(
"The aws.credentials.provider.type: " + awsAuthenticationType + " is invalidated");
}
}
private static AWSCredentialsProvider createAWSStaticCredentialsProvider(Map<String, String> awsProperties) {
String awsAccessKeyId = awsProperties.get(AwsConfigurationKeys.AWS_ACCESS_KEY_ID);
String awsSecretAccessKey = awsProperties.get(AwsConfigurationKeys.AWS_SECRET);
final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey);
AWSStaticCredentialsProvider awsStaticCredentialsProvider =
new AWSStaticCredentialsProvider(basicAWSCredentials);
log.info("AWSStaticCredentialsProvider created successfully");
return awsStaticCredentialsProvider;
}
private static AWSCredentialsProvider createInstanceProfileCredentialsProvider() {
InstanceProfileCredentialsProvider instanceProfileCredentialsProvider =
InstanceProfileCredentialsProvider.getInstance();
log.info("InstanceProfileCredentialsProvider created successfully");
return instanceProfileCredentialsProvider;
}
}

49
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSCredentialsProviderType.java

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
import java.util.Optional;
import lombok.Getter;
@Getter
public enum AWSCredentialsProviderType {
STATIC_CREDENTIALS_PROVIDER("AWSStaticCredentialsProvider"),
INSTANCE_PROFILE_CREDENTIALS_PROVIDER("InstanceProfileCredentialsProvider"),
;
private final String name;
AWSCredentialsProviderType(String name) {
this.name = name;
}
public static Optional<AWSCredentialsProviderType> of(String name) {
if (name == null) {
return Optional.empty();
}
for (AWSCredentialsProviderType type : values()) {
if (type.getName().equalsIgnoreCase(name)) {
return Optional.of(type);
}
}
return Optional.empty();
}
}

53
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AWSDatabaseMigrationServiceClientFactory.java

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
import java.util.Map;
import lombok.experimental.UtilityClass;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationService;
import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationServiceClientBuilder;
@UtilityClass
public class AWSDatabaseMigrationServiceClientFactory {
public AWSDatabaseMigrationService createAWSDatabaseMigrationServiceClient(Map<String, String> awsProperties) {
AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties);
Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION));
String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT);
if (endpoint != null && !endpoint.isEmpty()) {
return AWSDatabaseMigrationServiceClientBuilder
.standard()
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName()))
.withCredentials(awsCredentialsProvider)
.build();
} else {
return AWSDatabaseMigrationServiceClientBuilder
.standard()
.withCredentials(awsCredentialsProvider)
.withRegion(regions)
.build();
}
}
}

53
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonElasticMapReduceClientFactory.java

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
import java.util.Map;
import lombok.experimental.UtilityClass;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClientBuilder;
@UtilityClass
public class AmazonElasticMapReduceClientFactory {
public AmazonElasticMapReduce createAmazonElasticMapReduceClient(Map<String, String> awsProperties) {
AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties);
Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION));
String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT);
if (endpoint != null && !endpoint.isEmpty()) {
return AmazonElasticMapReduceClientBuilder
.standard()
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName()))
.withCredentials(awsCredentialsProvider)
.build();
} else {
return AmazonElasticMapReduceClientBuilder
.standard()
.withCredentials(awsCredentialsProvider)
.withRegion(regions)
.build();
}
}
}

54
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonS3ClientFactory.java

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
import java.util.Map;
import lombok.experimental.UtilityClass;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
@UtilityClass
public class AmazonS3ClientFactory {
public AmazonS3 createAmazonS3Client(Map<String, String> awsProperties) {
AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties);
Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION));
String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT);
if (endpoint != null && !endpoint.isEmpty()) {
return AmazonS3ClientBuilder
.standard()
.withPathStyleAccessEnabled(true)
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName()))
.withCredentials(awsCredentialsProvider)
.build();
} else {
return AmazonS3ClientBuilder
.standard()
.withCredentials(awsCredentialsProvider)
.withRegion(regions)
.build();
}
}
}

53
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AmazonSageMakerClientFactory.java

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
import java.util.Map;
import lombok.experimental.UtilityClass;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.sagemaker.AmazonSageMaker;
import com.amazonaws.services.sagemaker.AmazonSageMakerClientBuilder;
@UtilityClass
public class AmazonSageMakerClientFactory {
public AmazonSageMaker createAmazonSageMakerClient(Map<String, String> awsProperties) {
AWSCredentialsProvider awsCredentialsProvider = AWSCredentialsProviderFactor.credentialsProvider(awsProperties);
Regions regions = Regions.fromName(awsProperties.get(AwsConfigurationKeys.AWS_REGION));
String endpoint = awsProperties.get(AwsConfigurationKeys.AWS_ENDPOINT);
if (endpoint != null && !endpoint.isEmpty()) {
return AmazonSageMakerClientBuilder
.standard()
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, regions.getName()))
.withCredentials(awsCredentialsProvider)
.build();
} else {
return AmazonSageMakerClientBuilder
.standard()
.withCredentials(awsCredentialsProvider)
.withRegion(regions)
.build();
}
}
}

28
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/AwsConfigurationKeys.java

@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
public class AwsConfigurationKeys {
public static final String AWS_AUTHENTICATION_TYPE = "credentials.provider.type";
public static final String AWS_REGION = "region";
public static final String AWS_ENDPOINT = "endpoint";
public static final String AWS_ACCESS_KEY_ID = "access.key.id";
public static final String AWS_SECRET = "access.key.secret";
}

47
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/java/org/apache/dolphinscheduler/authentication/aws/DataSyncClientFactory.java

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.authentication.aws;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.datasync.DataSyncClient;
import java.util.Map;
import lombok.experimental.UtilityClass;
@UtilityClass
public class DataSyncClientFactory {
public DataSyncClient createDataSyncClient(Map<String, String> awsProperties) {
// todo: upgrade the version of aws sdk
String awsAccessKeyId = awsProperties.get(AwsConfigurationKeys.AWS_ACCESS_KEY_ID);
String awsSecretAccessKey = awsProperties.get(AwsConfigurationKeys.AWS_SECRET);
final AwsBasicCredentials basicAWSCredentials = AwsBasicCredentials.create(awsAccessKeyId, awsSecretAccessKey);
final AwsCredentialsProvider awsCredentialsProvider = StaticCredentialsProvider.create(basicAWSCredentials);
// create a datasync client
return DataSyncClient.builder()
.region(Region.of(awsProperties.get(AwsConfigurationKeys.AWS_REGION)))
.credentialsProvider(awsCredentialsProvider)
.build();
}
}

65
dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources/aws.yaml

@ -0,0 +1,65 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
aws:
s3:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: accessKey123
access.key.secret: secretKey123
region: us-east-1
bucket.name: dolphinscheduler
endpoint: http://s3:9000
emr:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000
sagemaker:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000
dms:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000
datasync:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000

46
dolphinscheduler-authentication/pom.xml

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler</artifactId>
<version>dev-SNAPSHOT</version>
</parent>
<artifactId>dolphinscheduler-authentication</artifactId>
<packaging>pom</packaging>
<modules>
<module>dolphinscheduler-aws-authentication</module>
</modules>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-bom</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
</project>

4
dolphinscheduler-common/pom.xml

@ -41,6 +41,10 @@
</dependencyManagement>
<dependencies>
<!-- dolphinscheduler -->
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-aws-authentication</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>

15
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java

@ -36,6 +36,7 @@ public final class Constants {
public static final String COMMON_PROPERTIES_PATH = "/common.properties";
public static final String REMOTE_LOGGING_YAML_PATH = "/remote-logging.yaml";
public static final String AWS_YAML_PATH = "/aws.yaml";
public static final String FORMAT_SS = "%s%s";
public static final String FORMAT_S_S = "%s/%s";
@ -130,8 +131,7 @@ public final class Constants {
*/
public static final String RESOURCE_STORAGE_TYPE = "resource.storage.type";
public static final String AWS_S3_BUCKET_NAME = "resource.aws.s3.bucket.name";
public static final String AWS_END_POINT = "resource.aws.s3.endpoint";
public static final String AWS_S3_BUCKET_NAME = "aws.s3.bucket.name";
public static final String ALIBABA_CLOUD_OSS_BUCKET_NAME = "resource.alibaba.cloud.oss.bucket.name";
public static final String ALIBABA_CLOUD_OSS_END_POINT = "resource.alibaba.cloud.oss.endpoint";
@ -704,19 +704,8 @@ public final class Constants {
public static final String REMOTE_LOGGING_OSS_ENDPOINT = "remote.logging.oss.endpoint";
/**
* remote logging for S3
*/
public static final String REMOTE_LOGGING_S3_ACCESS_KEY_ID = "remote.logging.s3.access.key.id";
public static final String REMOTE_LOGGING_S3_ACCESS_KEY_SECRET = "remote.logging.s3.access.key.secret";
public static final String REMOTE_LOGGING_S3_BUCKET_NAME = "remote.logging.s3.bucket.name";
public static final String REMOTE_LOGGING_S3_ENDPOINT = "remote.logging.s3.endpoint";
public static final String REMOTE_LOGGING_S3_REGION = "remote.logging.s3.region";
/**
* remote logging for GCS
*/

60
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/S3RemoteLogHandler.java

@ -17,6 +17,7 @@
package org.apache.dolphinscheduler.common.log.remote;
import org.apache.dolphinscheduler.authentication.aws.AmazonS3ClientFactory;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
@ -26,41 +27,25 @@ import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
@Slf4j
public class S3RemoteLogHandler implements RemoteLogHandler, Closeable {
private String accessKeyId;
private final String bucketName;
private String accessKeySecret;
private String region;
private String bucketName;
private String endPoint;
private AmazonS3 s3Client;
private final AmazonS3 s3Client;
private static S3RemoteLogHandler instance;
private S3RemoteLogHandler() {
accessKeyId = readAccessKeyID();
accessKeySecret = readAccessKeySecret();
region = readRegion();
bucketName = readBucketName();
endPoint = readEndPoint();
s3Client = buildS3Client();
checkBucketNameExists(bucketName);
}
@ -74,23 +59,8 @@ public class S3RemoteLogHandler implements RemoteLogHandler, Closeable {
}
protected AmazonS3 buildS3Client() {
if (StringUtils.isNotEmpty(endPoint)) {
return AmazonS3ClientBuilder
.standard()
.withPathStyleAccessEnabled(true)
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(
endPoint, Regions.fromName(region).getName()))
.withCredentials(
new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret)))
.build();
} else {
return AmazonS3ClientBuilder
.standard()
.withCredentials(
new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret)))
.withRegion(Regions.fromName(region))
.build();
}
Map<String, String> awsProperties = PropertyUtils.getByPrefix("aws.s3.", "");
return AmazonS3ClientFactory.createAmazonS3Client(awsProperties);
}
@Override
@ -131,24 +101,8 @@ public class S3RemoteLogHandler implements RemoteLogHandler, Closeable {
}
}
protected String readAccessKeyID() {
return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_ACCESS_KEY_ID);
}
protected String readAccessKeySecret() {
return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_ACCESS_KEY_SECRET);
}
protected String readRegion() {
return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_REGION);
}
protected String readBucketName() {
return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_BUCKET_NAME);
}
protected String readEndPoint() {
return PropertyUtils.getString(Constants.REMOTE_LOGGING_S3_ENDPOINT);
return PropertyUtils.getString(Constants.AWS_S3_BUCKET_NAME);
}
public void checkBucketNameExists(String bucketName) {

16
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java

@ -17,6 +17,7 @@
package org.apache.dolphinscheduler.common.utils;
import static org.apache.dolphinscheduler.common.constants.Constants.AWS_YAML_PATH;
import static org.apache.dolphinscheduler.common.constants.Constants.COMMON_PROPERTIES_PATH;
import static org.apache.dolphinscheduler.common.constants.Constants.REMOTE_LOGGING_YAML_PATH;
@ -42,7 +43,7 @@ public class PropertyUtils {
private final ImmutablePriorityPropertyDelegate propertyDelegate =
new ImmutablePriorityPropertyDelegate(
new ImmutablePropertyDelegate(COMMON_PROPERTIES_PATH),
new ImmutableYamlDelegate(REMOTE_LOGGING_YAML_PATH));
new ImmutableYamlDelegate(REMOTE_LOGGING_YAML_PATH, AWS_YAML_PATH));
public static String getString(String key) {
return propertyDelegate.get(key.trim());
@ -106,6 +107,19 @@ public class PropertyUtils {
return matchedProperties;
}
/**
* Get all properties with specified prefix, like: fs., will replace the prefix with newPrefix
*/
public static Map<String, String> getByPrefix(String prefix, String newPrefix) {
Map<String, String> matchedProperties = new HashMap<>();
for (String propName : propertyDelegate.getPropertyKeys()) {
if (propName.startsWith(prefix)) {
matchedProperties.put(propName.replace(prefix, newPrefix), propertyDelegate.get(propName));
}
}
return matchedProperties;
}
public static <T> Set<T> getSet(String key, Function<String, Set<T>> transformFunction, Set<T> defaultValue) {
return propertyDelegate.get(key, transformFunction, defaultValue);
}

22
dolphinscheduler-common/src/main/resources/common.properties

@ -39,17 +39,6 @@ resource.azure.tenant.id=minioadmin
# The query interval
resource.query.interval=10000
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=minioadmin
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=minioadmin
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=cn-north-1
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=http://localhost:9000
# alibaba cloud access key id, required if you set resource.storage.type=OSS
resource.alibaba.cloud.access.key.id=<your-access-key-id>
# alibaba cloud access key secret, required if you set resource.storage.type=OSS
@ -188,16 +177,7 @@ remote.logging.oss.access.key.secret=<access.key.secret>
remote.logging.oss.bucket.name=<bucket.name>
# oss endpoint, required if you set remote.logging.target=OSS
remote.logging.oss.endpoint=<endpoint>
# s3 access key id, required if you set remote.logging.target=S3
remote.logging.s3.access.key.id=<access.key.id>
# s3 access key secret, required if you set remote.logging.target=S3
remote.logging.s3.access.key.secret=<access.key.secret>
# s3 bucket name, required if you set remote.logging.target=S3
remote.logging.s3.bucket.name=<bucket.name>
# s3 endpoint, required if you set remote.logging.target=S3
remote.logging.s3.endpoint=<endpoint>
# s3 region, required if you set remote.logging.target=S3
remote.logging.s3.region=<region>
# the location of the google cloud credential, required if you set remote.logging.target=GCS
remote.logging.google.cloud.storage.credential=/path/to/credential
# gcs bucket name, required if you set remote.logging.target=GCS

18
dolphinscheduler-common/src/main/resources/remote-logging.yaml

@ -34,28 +34,16 @@ remote-logging:
bucket.name: <bucket.name>
# oss endpoint, required if you set remote-logging.target=OSS
endpoint: <endpoint>
# required if you set remote-logging.target=S3
s3:
# s3 access key id, required if you set remote-logging.target=S3
access.key.id: <access.key.id>
# s3 access key secret, required if you set remote-logging.target=S3
access.key.secret: <access.key.secret>
# s3 bucket name, required if you set remote-logging.target=S3
bucket.name: <bucket.name>
# s3 endpoint, required if you set remote-logging.target=S3
endpoint: <endpoint>
# s3 region, required if you set remote-logging.target=S3
region: <region>
google.cloud.storage:
# the location of the google cloud credential, required if you set remote-logging.target=GCS
credential: /path/to/credential
# gcs bucket name, required if you set remote-logging.target=GCS
# gcs bucket name, required if you set remote-logging.target=GCS
bucket.name: <your-bucket>
abs:
# abs account name, required if you set resource.storage.type=ABS
account.name: <your-account-name>
# abs account key, required if you set resource.storage.type=ABS
# abs account key, required if you set resource.storage.type=ABS
account.key: <your-account-key>
# abs container name, required if you set resource.storage.type=ABS
# abs container name, required if you set resource.storage.type=ABS
container.name: <your-container-name>

13
dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java

@ -17,12 +17,15 @@
package org.apache.dolphinscheduler.common.utils;
import static com.google.common.truth.Truth.assertThat;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.commons.lang3.StringUtils;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@ -48,4 +51,14 @@ public class PropertyUtilsTest {
}, Sets.newHashSet("docker0"));
Assertions.assertEquals(Sets.newHashSet("docker0"), networkInterface);
}
@Test
void getByPrefix() {
Map<String, String> awsProperties = PropertyUtils.getByPrefix("resource.aws.", "");
assertThat(awsProperties).containsEntry("access.key.id", "minioadmin");
assertThat(awsProperties).containsEntry("secret.access.key", "minioadmin");
assertThat(awsProperties).containsEntry("region", "cn-north-1");
assertThat(awsProperties).containsEntry("s3.bucket.name", "dolphinscheduler");
assertThat(awsProperties).containsEntry("endpoint", "http://localhost:9000");
}
}

6
dolphinscheduler-common/src/test/resources/common.properties

@ -45,6 +45,10 @@ resource.azure.tenant.id=minioadmin
# The query interval
resource.query.interval=10000
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
aws.credentials.provider.type=AWSStaticCredentialsProvider
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=minioadmin
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
@ -54,7 +58,7 @@ resource.aws.region=cn-north-1
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=http://localhost:9000
resource.aws.endpoint=http://localhost:9000
# alibaba cloud access key id, required if you set resource.storage.type=OSS
resource.alibaba.cloud.access.key.id=<your-access-key-id>

65
dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/aws.yaml

@ -0,0 +1,65 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
aws:
s3:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: accessKey123
access.key.secret: secretKey123
region: us-east-1
bucket.name: dolphinscheduler
endpoint: http://s3:9000
emr:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000
sagemaker:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000
dms:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000
datasync:
# The AWS credentials provider type. support: AWSStaticCredentialsProvider, InstanceProfileCredentialsProvider
# AWSStaticCredentialsProvider: use the access key and secret key to authenticate
# InstanceProfileCredentialsProvider: use the IAM role to authenticate
credentials.provider.type: AWSStaticCredentialsProvider
access.key.id: minioadmin
access.key.secret: minioadmin
region: cn-north-1
endpoint: http://localhost:9000

11
dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties

@ -37,17 +37,6 @@ resource.azure.tenant.id=minioadmin
# The query interval
resource.query.interval=10000
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=accessKey123
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=secretKey123
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=us-east-1
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=http://s3:9000
# alibaba cloud access key id, required if you set resource.storage.type=OSS
resource.alibaba.cloud.access.key.id=<your-access-key-id>
# alibaba cloud access key secret, required if you set resource.storage.type=OSS

1
dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml

@ -34,6 +34,7 @@ services:
retries: 120
volumes:
- ./common.properties:/opt/dolphinscheduler/conf/common.properties
- ./aws.yaml:/opt/dolphinscheduler/conf/aws.yaml
depends_on:
s3:
condition: service_healthy

7
dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml

@ -56,6 +56,13 @@
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources</directory>
<includes>
<include>**/*.yaml</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>

7
dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml

@ -86,6 +86,13 @@
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources</directory>
<includes>
<include>**/*.yaml</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-api/src/main/resources</directory>
<includes>

53
dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java

@ -17,19 +17,18 @@
package org.apache.dolphinscheduler.plugin.storage.s3;
import static org.apache.dolphinscheduler.common.constants.Constants.AWS_END_POINT;
import static org.apache.dolphinscheduler.common.constants.Constants.FOLDER_SEPARATOR;
import static org.apache.dolphinscheduler.common.constants.Constants.FORMAT_S_S;
import static org.apache.dolphinscheduler.common.constants.Constants.RESOURCE_TYPE_FILE;
import static org.apache.dolphinscheduler.common.constants.Constants.RESOURCE_TYPE_UDF;
import org.apache.dolphinscheduler.authentication.aws.AmazonS3ClientFactory;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.enums.ResUploadType;
import org.apache.dolphinscheduler.common.utils.FileUtils;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity;
import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate;
import org.apache.dolphinscheduler.plugin.task.api.TaskConstants;
import org.apache.dolphinscheduler.spi.enums.ResourceType;
import org.apache.commons.lang3.StringUtils;
@ -57,11 +56,7 @@ import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
@ -79,71 +74,27 @@ import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
@Data
public class S3StorageOperator implements Closeable, StorageOperate {
private String accessKeyId;
private String accessKeySecret;
private String region;
private String bucketName;
private String endPoint;
private AmazonS3 s3Client;
public S3StorageOperator() {
}
public void init() {
accessKeyId = readAccessKeyID();
accessKeySecret = readAccessKeySecret();
region = readRegion();
bucketName = readBucketName();
endPoint = readEndPoint();
s3Client = buildS3Client();
checkBucketNameExists(bucketName);
}
protected AmazonS3 buildS3Client() {
if (!StringUtils.isEmpty(endPoint)) {
return AmazonS3ClientBuilder
.standard()
.withPathStyleAccessEnabled(true)
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(
endPoint, region))
.withCredentials(
new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret)))
.build();
} else {
return AmazonS3ClientBuilder
.standard()
.withCredentials(
new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyId, accessKeySecret)))
.withRegion(region)
.build();
}
}
protected String readAccessKeyID() {
return PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID);
}
protected String readAccessKeySecret() {
return PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY);
}
protected String readRegion() {
return PropertyUtils.getString(TaskConstants.AWS_REGION);
return AmazonS3ClientFactory.createAmazonS3Client(PropertyUtils.getByPrefix("aws.s3.", ""));
}
protected String readBucketName() {
return PropertyUtils.getString(Constants.AWS_S3_BUCKET_NAME);
}
protected String readEndPoint() {
return PropertyUtils.getString(AWS_END_POINT);
}
@Override
public void close() throws IOException {
s3Client.shutdown();

9
dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/test/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperatorTest.java

@ -78,13 +78,7 @@ public class S3StorageOperatorTest {
public void setUp() throws Exception {
s3StorageOperator = Mockito.spy(new S3StorageOperator());
doReturn(ACCESS_KEY_ID_MOCK).when(s3StorageOperator)
.readAccessKeyID();
doReturn(ACCESS_KEY_SECRET_MOCK).when(s3StorageOperator)
.readAccessKeySecret();
doReturn(REGION_MOCK).when(s3StorageOperator).readRegion();
doReturn(BUCKET_NAME_MOCK).when(s3StorageOperator).readBucketName();
doReturn(END_POINT_MOCK).when(s3StorageOperator).readEndPoint();
Mockito.doReturn(s3Client)
.when(s3StorageOperator).buildS3Client();
Mockito.doNothing()
@ -96,9 +90,6 @@ public class S3StorageOperatorTest {
@Test
public void testInit() {
verify(s3StorageOperator, times(1)).buildS3Client();
Assertions.assertEquals(ACCESS_KEY_ID_MOCK, s3StorageOperator.getAccessKeyId());
Assertions.assertEquals(ACCESS_KEY_SECRET_MOCK, s3StorageOperator.getAccessKeySecret());
Assertions.assertEquals(REGION_MOCK, s3StorageOperator.getRegion());
Assertions.assertEquals(BUCKET_NAME_MOCK, s3StorageOperator.getBucketName());
}

11
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties

@ -26,17 +26,6 @@ resource.storage.type=NONE
# resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path=/dolphinscheduler
# The AWS access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.access.key.id=minioadmin
# The AWS secret access key. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.secret.access.key=minioadmin
# The AWS Region to use. if resource.storage.type=S3 or use EMR-Task, This configuration is required
resource.aws.region=cn-north-1
# The name of the bucket. You need to create them by yourself. Otherwise, the system cannot start. All buckets in Amazon S3 share a single namespace; ensure the bucket is given a unique name.
resource.aws.s3.bucket.name=dolphinscheduler
# You need to set this parameter when private cloud s3. If S3 uses public cloud, you only need to set resource.aws.region or set to the endpoint of a public cloud such as S3.cn-north-1.amazonaws.com.cn
resource.aws.s3.endpoint=http://localhost:9000
# alibaba cloud access key id, required if you set resource.storage.type=OSS
resource.alibaba.cloud.access.key.id=<your-access-key-id>
# alibaba cloud access key secret, required if you set resource.storage.type=OSS

8
dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/pom.xml

@ -31,22 +31,18 @@
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-spi</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-task-api</artifactId>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/datasync -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>datasync</artifactId>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-aws-authentication</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-common</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>

19
dolphinscheduler-task-plugin/dolphinscheduler-task-datasync/src/main/java/org/apache/dolphinscheduler/plugin/task/datasync/DatasyncHook.java

@ -17,13 +17,9 @@
package org.apache.dolphinscheduler.plugin.task.datasync;
import org.apache.dolphinscheduler.authentication.aws.DataSyncClientFactory;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.plugin.task.api.TaskConstants;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.datasync.DataSyncClient;
import software.amazon.awssdk.services.datasync.model.CancelTaskExecutionRequest;
import software.amazon.awssdk.services.datasync.model.CancelTaskExecutionResponse;
@ -48,6 +44,7 @@ import org.apache.commons.lang3.StringUtils;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.Data;
@ -73,16 +70,8 @@ public class DatasyncHook {
}
protected static DataSyncClient createClient() {
final String awsAccessKeyId = PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID);
final String awsSecretAccessKey = PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY);
final String awsRegion = PropertyUtils.getString(TaskConstants.AWS_REGION);
final AwsBasicCredentials basicAWSCredentials = AwsBasicCredentials.create(awsAccessKeyId, awsSecretAccessKey);
final AwsCredentialsProvider awsCredentialsProvider = StaticCredentialsProvider.create(basicAWSCredentials);
// create a datasync client
return DataSyncClient.builder().region(Region.of(awsRegion)).credentialsProvider(awsCredentialsProvider)
.build();
Map<String, String> awsProperties = PropertyUtils.getByPrefix("aws.datasync.", "");
return DataSyncClientFactory.createDataSyncClient(awsProperties);
}
public Boolean createDatasyncTask(DatasyncParameters parameters) {

8
dolphinscheduler-task-plugin/dolphinscheduler-task-dms/pom.xml

@ -25,27 +25,23 @@
</parent>
<artifactId>dolphinscheduler-task-dms</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-spi</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-task-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-common</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-dms</artifactId>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-aws-authentication</artifactId>
</dependency>
</dependencies>

20
dolphinscheduler-task-plugin/dolphinscheduler-task-dms/src/main/java/org/apache/dolphinscheduler/plugin/task/dms/DmsHook.java

@ -17,9 +17,9 @@
package org.apache.dolphinscheduler.plugin.task.dms;
import org.apache.dolphinscheduler.authentication.aws.AWSDatabaseMigrationServiceClientFactory;
import org.apache.dolphinscheduler.common.thread.ThreadUtils;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.plugin.task.api.TaskConstants;
import org.apache.commons.io.IOUtils;
@ -29,6 +29,7 @@ import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -37,11 +38,7 @@ import lombok.NoArgsConstructor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationService;
import com.amazonaws.services.databasemigrationservice.AWSDatabaseMigrationServiceClientBuilder;
import com.amazonaws.services.databasemigrationservice.model.CreateReplicationTaskRequest;
import com.amazonaws.services.databasemigrationservice.model.CreateReplicationTaskResult;
import com.amazonaws.services.databasemigrationservice.model.DeleteReplicationTaskRequest;
@ -87,17 +84,8 @@ public class DmsHook {
}
public static AWSDatabaseMigrationService createClient() {
final String awsAccessKeyId = PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID);
final String awsSecretAccessKey = PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY);
final String awsRegion = PropertyUtils.getString(TaskConstants.AWS_REGION);
final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey);
final AWSCredentialsProvider awsCredentialsProvider = new AWSStaticCredentialsProvider(basicAWSCredentials);
// create a DMS client
return AWSDatabaseMigrationServiceClientBuilder.standard()
.withCredentials(awsCredentialsProvider)
.withRegion(awsRegion)
.build();
Map<String, String> awsProperties = PropertyUtils.getByPrefix("aws.dms.", "");
return AWSDatabaseMigrationServiceClientFactory.createAWSDatabaseMigrationServiceClient(awsProperties);
}
public Boolean createReplicationTask() throws Exception {

9
dolphinscheduler-task-plugin/dolphinscheduler-task-emr/pom.xml

@ -28,19 +28,20 @@
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-spi</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-task-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-emr</artifactId>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-aws-authentication</artifactId>
</dependency>
</dependencies>

25
dolphinscheduler-task-plugin/dolphinscheduler-task-emr/src/main/java/org/apache/dolphinscheduler/plugin/task/emr/AbstractEmrTask.java

@ -22,22 +22,19 @@ import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKN
import static com.fasterxml.jackson.databind.DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL;
import static com.fasterxml.jackson.databind.MapperFeature.REQUIRE_SETTERS_FOR_GETTERS;
import org.apache.dolphinscheduler.authentication.aws.AmazonElasticMapReduceClientFactory;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.plugin.task.api.AbstractRemoteTask;
import org.apache.dolphinscheduler.plugin.task.api.TaskConstants;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters;
import java.util.Map;
import java.util.TimeZone;
import lombok.extern.slf4j.Slf4j;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClientBuilder;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
@ -94,22 +91,8 @@ public abstract class AbstractEmrTask extends AbstractRemoteTask {
return emrParameters;
}
/**
* create emr client from BasicAWSCredentials
*
* @return AmazonElasticMapReduce
*/
protected AmazonElasticMapReduce createEmrClient() {
final String awsAccessKeyId = PropertyUtils.getString(TaskConstants.AWS_ACCESS_KEY_ID);
final String awsSecretAccessKey = PropertyUtils.getString(TaskConstants.AWS_SECRET_ACCESS_KEY);
final String awsRegion = PropertyUtils.getString(TaskConstants.AWS_REGION);
final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey);
final AWSCredentialsProvider awsCredentialsProvider = new AWSStaticCredentialsProvider(basicAWSCredentials);
// create an EMR client
return AmazonElasticMapReduceClientBuilder.standard()
.withCredentials(awsCredentialsProvider)
.withRegion(awsRegion)
.build();
Map<String, String> awsProperties = PropertyUtils.getByPrefix("aws.emr.", "");
return AmazonElasticMapReduceClientFactory.createAmazonElasticMapReduceClient(awsProperties);
}
}

11
dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/pom.xml

@ -31,26 +31,23 @@
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-spi</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-task-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-common</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-sagemaker</artifactId>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-datasource-all</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-datasource-all</artifactId>
<version>${project.version}</version>
<artifactId>dolphinscheduler-aws-authentication</artifactId>
</dependency>
</dependencies>

18
dolphinscheduler-task-plugin/dolphinscheduler-task-sagemaker/src/main/java/org/apache/dolphinscheduler/plugin/task/sagemaker/SagemakerTask.java

@ -22,7 +22,9 @@ import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKN
import static com.fasterxml.jackson.databind.DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL;
import static com.fasterxml.jackson.databind.MapperFeature.REQUIRE_SETTERS_FOR_GETTERS;
import org.apache.dolphinscheduler.authentication.aws.AmazonSageMakerClientFactory;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.plugin.datasource.api.utils.DataSourceUtils;
import org.apache.dolphinscheduler.plugin.datasource.sagemaker.param.SagemakerConnectionParam;
import org.apache.dolphinscheduler.plugin.task.api.AbstractRemoteTask;
@ -41,11 +43,7 @@ import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.sagemaker.AmazonSageMaker;
import com.amazonaws.services.sagemaker.AmazonSageMakerClientBuilder;
import com.amazonaws.services.sagemaker.model.StartPipelineExecutionRequest;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
@ -186,16 +184,8 @@ public class SagemakerTask extends AbstractRemoteTask {
}
protected AmazonSageMaker createClient() {
final String awsAccessKeyId = parameters.getUsername();
final String awsSecretAccessKey = parameters.getPassword();
final String awsRegion = parameters.getAwsRegion();
final BasicAWSCredentials basicAWSCredentials = new BasicAWSCredentials(awsAccessKeyId, awsSecretAccessKey);
final AWSCredentialsProvider awsCredentialsProvider = new AWSStaticCredentialsProvider(basicAWSCredentials);
// create a SageMaker client
return AmazonSageMakerClientBuilder.standard()
.withCredentials(awsCredentialsProvider)
.withRegion(awsRegion)
.build();
Map<String, String> awsProperties = PropertyUtils.getByPrefix("aws.sagemaker.", "");
return AmazonSageMakerClientFactory.createAmazonSageMakerClient(awsProperties);
}
}

7
dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml

@ -57,6 +57,13 @@
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-authentication/dolphinscheduler-aws-authentication/src/main/resources</directory>
<includes>
<include>**/*.yaml</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>

7
pom.xml

@ -57,6 +57,7 @@
<module>dolphinscheduler-storage-plugin</module>
<module>dolphinscheduler-extract</module>
<module>dolphinscheduler-dao-plugin</module>
<module>dolphinscheduler-authentication</module>
</modules>
<properties>
@ -294,6 +295,12 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-aws-authentication</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.junit</groupId>
<artifactId>junit-bom</artifactId>

Loading…
Cancel
Save