Browse Source

[feature][Resource] Add support for Azure Blob Storage in the resource center (#14082)

3.2.1-prepare
Rick Cheng 1 year ago committed by GitHub
parent
commit
4fa0ab4d7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      deploy/kubernetes/dolphinscheduler/values.yaml
  2. 2
      docs/docs/en/architecture/configuration.md
  3. 2
      docs/docs/en/guide/installation/kubernetes.md
  4. 16
      docs/docs/en/guide/remote-logging.md
  5. 2
      docs/docs/en/guide/resource/configuration.md
  6. 2
      docs/docs/zh/architecture/configuration.md
  7. 2
      docs/docs/zh/guide/installation/kubernetes.md
  8. 16
      docs/docs/zh/guide/remote-logging.md
  9. 2
      docs/docs/zh/guide/resource/configuration.md
  10. 2
      dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties
  11. 9
      dolphinscheduler-bom/pom.xml
  12. 6
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java
  13. 2
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/ResUploadType.java
  14. 9
      dolphinscheduler-common/src/main/resources/common.properties
  15. 16
      dolphinscheduler-dist/release-docs/LICENSE
  16. 14
      dolphinscheduler-dist/release-docs/licenses/LICENSE-azure-storage-blob.txt
  17. 14
      dolphinscheduler-dist/release-docs/licenses/LICENSE-azure-storage-internal-avro.txt
  18. 2
      dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties
  19. 47
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/pom.xml
  20. 488
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperator.java
  21. 40
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperatorFactory.java
  22. 274
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/test/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperatorTest.java
  23. 5
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-all/pom.xml
  24. 4
      dolphinscheduler-storage-plugin/dolphinscheduler-storage-api/src/main/java/org/apache/dolphinscheduler/plugin/storage/api/StorageType.java
  25. 1
      dolphinscheduler-storage-plugin/pom.xml
  26. 2
      dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties
  27. 16
      tools/dependencies/known-dependencies.txt

2
deploy/kubernetes/dolphinscheduler/values.yaml

@ -104,7 +104,7 @@ conf:
# user data local directory path, please make sure the directory exists and have read write permissions
data.basedir.path: /tmp/dolphinscheduler
# resource storage type: HDFS, S3, OSS, GCS, NONE
# resource storage type: HDFS, S3, OSS, GCS, ABS, NONE
resource.storage.type: S3
# resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended

2
docs/docs/en/architecture/configuration.md

@ -197,7 +197,7 @@ The default configuration is as follows:
| Parameters | Default value | Description |
|--|--|--|
|data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files|
|resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, NONE|
|resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE|
|resource.upload.path | /dolphinscheduler | storage path of resource files|
|aws.access.key.id | minioadmin | access key id of S3|
|aws.secret.access.key | minioadmin | secret access key of S3|

2
docs/docs/en/guide/installation/kubernetes.md

@ -553,7 +553,7 @@ common:
| | | |
| `common.configmap.DOLPHINSCHEDULER_OPTS` | The jvm options for dolphinscheduler, suitable for all servers | `""` |
| `common.configmap.DATA_BASEDIR_PATH` | User data directory path, self configuration, please make sure the directory exists and have read write permissions | `/tmp/dolphinscheduler` |
| `common.configmap.RESOURCE_STORAGE_TYPE` | Resource storage type: HDFS, S3, OSS, GCS, NONE | `HDFS` |
| `common.configmap.RESOURCE_STORAGE_TYPE` | Resource storage type: HDFS, S3, OSS, GCS, ABS, NONE | `HDFS` |
| `common.configmap.RESOURCE_UPLOAD_PATH` | Resource store on HDFS/S3 path, please make sure the directory exists on hdfs and have read write permissions | `/dolphinscheduler` |
| `common.configmap.FS_DEFAULT_FS` | Resource storage file system like `file:///`, `hdfs://mycluster:8020` or `s3a://dolphinscheduler` | `file:///` |
| `common.configmap.FS_S3A_ENDPOINT` | S3 endpoint when `common.configmap.RESOURCE_STORAGE_TYPE` is set to `S3` | `s3.xxx.amazonaws.com` |

16
docs/docs/en/guide/remote-logging.md

@ -61,3 +61,19 @@ remote.logging.google.cloud.storage.credential=/path/to/credential
remote.logging.google.cloud.storage.bucket.name=<your-bucket>
```
## Writing task logs to [Azure Blob Storage (ABS)](https://azure.microsoft.com/en-us/products/storage/blobs)
Configure `common.properties` as follows:
```properties
# abs container name, required if you set resource.storage.type=ABS
resource.azure.blob.storage.container.name=<your-container>
# abs account name, required if you set resource.storage.type=ABS
resource.azure.blob.storage.account.name=<your-account-name>
# abs connection string, required if you set resource.storage.type=ABS
resource.azure.blob.storage.connection.string=<your-connection-string>
```
### Notice
Since Azure Blob Storage does not support the existence of empty directories, there will be empty files `<no name>` under the resource directory. But it does not affect the file display on the Dolphinscheduler resource center.

2
docs/docs/en/guide/resource/configuration.md

@ -80,7 +80,7 @@ data.basedir.path=/tmp/dolphinscheduler
# resource view suffixs
#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js
# resource storage type: HDFS, S3, OSS, GCS, NONE
# resource storage type: HDFS, S3, OSS, GCS, ABS, NONE
resource.storage.type=NONE
# resource store on HDFS/S3/OSS path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path=/tmp/dolphinscheduler

2
docs/docs/zh/architecture/configuration.md

@ -194,7 +194,7 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId
| 参数 | 默认值 | 描述 |
|--|--|--|
|data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件|
|resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,NONE|
|resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE|
|resource.upload.path | /dolphinscheduler | 资源文件存储路径|
|aws.access.key.id | minioadmin | S3 access key|
|aws.secret.access.key | minioadmin | S3 secret access key|

2
docs/docs/zh/guide/installation/kubernetes.md

@ -552,7 +552,7 @@ common:
| | | |
| `common.configmap.DOLPHINSCHEDULER_OPTS` | The jvm options for dolphinscheduler, suitable for all servers | `""` |
| `common.configmap.DATA_BASEDIR_PATH` | User data directory path, self configuration, please make sure the directory exists and have read write permissions | `/tmp/dolphinscheduler` |
| `common.configmap.RESOURCE_STORAGE_TYPE` | Resource storage type: HDFS, S3, OSS, GCS, NONE | `HDFS` |
| `common.configmap.RESOURCE_STORAGE_TYPE` | Resource storage type: HDFS, S3, OSS, GCS, ABS, NONE | `HDFS` |
| `common.configmap.RESOURCE_UPLOAD_PATH` | Resource store on HDFS/S3 path, please make sure the directory exists on hdfs and have read write permissions | `/dolphinscheduler` |
| `common.configmap.FS_DEFAULT_FS` | Resource storage file system like `file:///`, `hdfs://mycluster:8020` or `s3a://dolphinscheduler` | `file:///` |
| `common.configmap.FS_S3A_ENDPOINT` | S3 endpoint when `common.configmap.RESOURCE_STORAGE_TYPE` is set to `S3` | `s3.xxx.amazonaws.com` |

16
docs/docs/zh/guide/remote-logging.md

@ -61,3 +61,19 @@ remote.logging.google.cloud.storage.credential=/path/to/credential
remote.logging.google.cloud.storage.bucket.name=<your-bucket>
```
## 将任务日志写入[Azure Blob Storage (ABS)](https://azure.microsoft.com/en-us/products/storage/blobs)
配置`common.propertis`如下:
```properties
# abs container name, required if you set resource.storage.type=ABS
resource.azure.blob.storage.container.name=<your-container>
# abs account name, required if you set resource.storage.type=ABS
resource.azure.blob.storage.account.name=<your-account-name>
# abs connection string, required if you set resource.storage.type=ABS
resource.azure.blob.storage.connection.string=<your-connection-string>
```
### 注意事项
由于Azure Blob Storage不支持空目录单独存在,因此资源目录下会有空文件`<no name>`。但是并不影响Dolphinscheduler资源中心上的文件展示。

2
docs/docs/zh/guide/resource/configuration.md

@ -79,7 +79,7 @@ resource.aws.s3.endpoint=
# user data local directory path, please make sure the directory exists and have read write permissions
data.basedir.path=/tmp/dolphinscheduler
# resource storage type: LOCAL, HDFS, S3, OSS, GCS
# resource storage type: LOCAL, HDFS, S3, OSS, GCS, ABS
resource.storage.type=LOCAL
# resource store on HDFS/S3/OSS path, resource file will store to this hadoop hdfs path, self configuration,

2
dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties

@ -21,7 +21,7 @@ data.basedir.path=/tmp/dolphinscheduler
# resource view suffixs
#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js
# resource storage type: HDFS, S3, OSS, GCS, NONE
# resource storage type: HDFS, S3, OSS, GCS, ABS, NONE
resource.storage.type=S3
# resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path=/dolphinscheduler

9
dolphinscheduler-bom/pom.xml

@ -113,6 +113,7 @@
<sshd.version>2.8.0</sshd.version>
<fabric8.client.version>6.0.0</fabric8.client.version>
<casdoor.version>1.6.0</casdoor.version>
<azure-sdk-bom.version>1.2.10</azure-sdk-bom.version>
</properties>
<dependencyManagement>
@ -700,6 +701,14 @@
<version>${google-cloud-storage.version}</version>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-sdk-bom</artifactId>
<version>${azure-sdk-bom.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>

6
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java

@ -141,6 +141,12 @@ public final class Constants {
public static final String GOOGLE_CLOUD_STORAGE_CREDENTIAL = "resource.google.cloud.storage.credential";
public static final String AZURE_BLOB_STORAGE_CONNECTION_STRING = "resource.azure.blob.storage.connection.string";
public static final String AZURE_BLOB_STORAGE_CONTAINER_NAME = "resource.azure.blob.storage.container.name";
public static final String AZURE_BLOB_STORAGE_ACCOUNT_NAME = "resource.azure.blob.storage.account.name";
/**
* fetch applicationId way
*/

2
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/ResUploadType.java

@ -21,5 +21,5 @@ package org.apache.dolphinscheduler.common.enums;
* data base types
*/
public enum ResUploadType {
LOCAL, HDFS, S3, OSS, GCS, NONE
LOCAL, HDFS, S3, OSS, GCS, ABS, NONE
}

9
dolphinscheduler-common/src/main/resources/common.properties

@ -21,7 +21,7 @@ data.basedir.path=/tmp/dolphinscheduler
# resource view suffixs
#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js
# resource storage type: LOCAL, HDFS, S3, OSS, GCS, NONE. LOCAL type is a specific type of HDFS with "resource.hdfs.fs.defaultFS = file:///" configuration
# resource storage type: LOCAL, HDFS, S3, OSS, GCS, ABS, NONE. LOCAL type is a specific type of HDFS with "resource.hdfs.fs.defaultFS = file:///" configuration
# please notice that LOCAL mode does not support reading and writing in distributed mode, which mean you can only use your resource in one machine, unless
# use shared file mount point
resource.storage.type=LOCAL
@ -66,6 +66,13 @@ resource.google.cloud.storage.credential=/path/to/credential
# gcs bucket name, required if you set resource.storage.type=GCS
resource.google.cloud.storage.bucket.name=<your-bucket>
# abs container name, required if you set resource.storage.type=ABS
resource.azure.blob.storage.container.name=<your-container>
# abs account name, required if you set resource.storage.type=ABS
resource.azure.blob.storage.account.name=<your-account-name>
# abs connection string, required if you set resource.storage.type=ABS
resource.azure.blob.storage.connection.string=<your-connection-string>
# if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path
resource.hdfs.root.user=hdfs
# if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir

16
dolphinscheduler-dist/release-docs/LICENSE vendored

@ -652,8 +652,8 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
bcprov-jdk15on 1.68: https://mvnrepository.com/artifact/org.bouncycastle/bcprov-jdk15on, MIT
reactive-streams 1.0.4: https://mvnrepository.com/artifact/org.reactivestreams/reactive-streams/1.0.4, MIT
webjars-locator-core 0.50: https://mvnrepository.com/artifact/org.webjars/webjars-locator-core/0.50, MIT
azure-core 1.34.0: https://mvnrepository.com/artifact/com.azure/azure-core/1.34.0, MIT
azure-core-http-netty 1.12.7: https://mvnrepository.com/artifact/com.azure/azure-core-http-netty/1.12.7, MIT
azure-core 1.36.0: https://mvnrepository.com/artifact/com.azure/azure-core/1.36.0, MIT
azure-core-http-netty 1.13.0: https://mvnrepository.com/artifact/com.azure/azure-core-http-netty/1.13.0, MIT
azure-identity 1.7.1: https://mvnrepository.com/artifact/com.azure/azure-identity/1.7.1, MIT
msal4j 1.13.3: https://mvnrepository.com/artifact/com.microsoft.azure/msal4j/1.13.3, MIT
msal4j-persistence-extension 1.1.0: https://mvnrepository.com/artifact/com.microsoft.azure/msal4j-persistence-extension/1.1.0, MIT
@ -682,11 +682,13 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
azure-resourcemanager-sql 2.21.0: https://mvnrepository.com/artifact/com.azure.resourcemanager/azure-resourcemanager-sql/2.21.0, MIT
azure-resourcemanager-storage 2.21.0: https://mvnrepository.com/artifact/com.azure.resourcemanager/azure-resourcemanager-storage/2.21.0, MIT
azure-resourcemanager-trafficmanager 2.21.0: https://mvnrepository.com/artifact/com.azure.resourcemanager/azure-resourcemanager-trafficmanager/2.21.0, MIT
azure-security-keyvault-keys 4.5.2: https://mvnrepository.com/artifact/com.azure/azure-security-keyvault-keys/4.5.2, MIT
azure-security-keyvault-secrets 4.5.2: https://mvnrepository.com/artifact/com.azure/azure-security-keyvault-secrets/4.5.2, MIT
azure-storage-common 12.19.1: https://mvnrepository.com/artifact/com.azure/azure-storage-common/12.19.1, MIT
azure-storage-file-share 12.16.1: https://mvnrepository.com/artifact/com.azure/azure-storage-file-share/12.16.1, MIT
azure-core-management 1.9.0: https://mvnrepository.com/artifact/com.azure/azure-core-management/1.9.0, MIT
azure-security-keyvault-keys 4.5.4: https://mvnrepository.com/artifact/com.azure/azure-security-keyvault-keys/4.5.4, MIT
azure-security-keyvault-secrets 4.5.4: https://mvnrepository.com/artifact/com.azure/azure-security-keyvault-secrets/4.5.4, MIT
azure-storage-common 12.20.0: https://mvnrepository.com/artifact/com.azure/azure-storage-common/12.20.0, MIT
azure-storage-file-share 12.17.0: https://mvnrepository.com/artifact/com.azure/azure-storage-file-share/12.17.0, MIT
azure-core-management 1.10.1: https://mvnrepository.com/artifact/com.azure/azure-core-management/1.10.1, MIT
azure-storage-blob 12.21.0: https://mvnrepository.com/artifact/com.azure/azure-storage-blob/12.21.0, MIT
azure-storage-internal-avro 12.6.0: https://mvnrepository.com/artifact/com.azure/azure-storage-internal-avro/12.6.0, MIT
========================================================================
MPL 1.1 licenses

14
dolphinscheduler-dist/release-docs/licenses/LICENSE-azure-storage-blob.txt vendored

@ -0,0 +1,14 @@
Copyright(c) 2016 Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"),
to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
and / or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions :
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

14
dolphinscheduler-dist/release-docs/licenses/LICENSE-azure-storage-internal-avro.txt vendored

@ -0,0 +1,14 @@
Copyright(c) 2016 Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"),
to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
and / or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions :
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

2
dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties

@ -21,7 +21,7 @@ data.basedir.path=/tmp/dolphinscheduler
# resource view suffixs
#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js
# resource storage type: HDFS, S3, OSS, GCS, NONE
# resource storage type: HDFS, S3, OSS, GCS, ABS, NONE
resource.storage.type=S3
# resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path=/dolphinscheduler

47
dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/pom.xml

@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-storage-plugin</artifactId>
<version>dev-SNAPSHOT</version>
</parent>
<artifactId>dolphinscheduler-storage-abs</artifactId>
<dependencies>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-storage-blob</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-storage-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-task-api</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>

488
dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperator.java

@ -0,0 +1,488 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.storage.abs;
import static org.apache.dolphinscheduler.common.constants.Constants.EMPTY_STRING;
import static org.apache.dolphinscheduler.common.constants.Constants.FOLDER_SEPARATOR;
import static org.apache.dolphinscheduler.common.constants.Constants.FORMAT_S_S;
import static org.apache.dolphinscheduler.common.constants.Constants.RESOURCE_TYPE_FILE;
import static org.apache.dolphinscheduler.common.constants.Constants.RESOURCE_TYPE_UDF;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.enums.ResUploadType;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity;
import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate;
import org.apache.dolphinscheduler.spi.enums.ResourceType;
import org.apache.commons.lang3.StringUtils;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Date;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import com.azure.core.http.rest.PagedIterable;
import com.azure.storage.blob.BlobClient;
import com.azure.storage.blob.BlobContainerClient;
import com.azure.storage.blob.BlobServiceClient;
import com.azure.storage.blob.BlobServiceClientBuilder;
import com.azure.storage.blob.models.BlobContainerItem;
import com.azure.storage.blob.models.BlobItem;
import com.azure.storage.blob.specialized.BlockBlobClient;
@Data
@Slf4j
public class AbsStorageOperator implements Closeable, StorageOperate {
private BlobContainerClient blobContainerClient;
private BlobServiceClient blobServiceClient;
private String connectionString;
private String storageAccountName;
private String containerName;
public AbsStorageOperator() {
}
public void init() {
containerName = readContainerName();
connectionString = readConnectionString();
storageAccountName = readAccountName();
blobServiceClient = buildBlobServiceClient();
blobContainerClient = buildBlobContainerClient();
checkContainerNameExists();
}
protected BlobServiceClient buildBlobServiceClient() {
return new BlobServiceClientBuilder()
.endpoint("https://" + storageAccountName + ".blob.core.windows.net/")
.connectionString(connectionString)
.buildClient();
}
protected BlobContainerClient buildBlobContainerClient() {
return blobServiceClient.getBlobContainerClient(containerName);
}
protected String readConnectionString() {
return PropertyUtils.getString(Constants.AZURE_BLOB_STORAGE_CONNECTION_STRING);
}
protected String readContainerName() {
return PropertyUtils.getString(Constants.AZURE_BLOB_STORAGE_CONTAINER_NAME);
}
protected String readAccountName() {
return PropertyUtils.getString(Constants.AZURE_BLOB_STORAGE_ACCOUNT_NAME);
}
@Override
public void createTenantDirIfNotExists(String tenantCode) throws Exception {
mkdir(tenantCode, getAbsResDir(tenantCode));
mkdir(tenantCode, getAbsUdfDir(tenantCode));
}
public String getAbsResDir(String tenantCode) {
return String.format("%s/" + RESOURCE_TYPE_FILE, getAbsTenantDir(tenantCode));
}
public String getAbsUdfDir(String tenantCode) {
return String.format("%s/" + RESOURCE_TYPE_UDF, getAbsTenantDir(tenantCode));
}
public String getAbsTenantDir(String tenantCode) {
return String.format(FORMAT_S_S, getGcsDataBasePath(), tenantCode);
}
public String getGcsDataBasePath() {
if (FOLDER_SEPARATOR.equals(RESOURCE_UPLOAD_PATH)) {
return EMPTY_STRING;
} else {
return RESOURCE_UPLOAD_PATH.replaceFirst(FOLDER_SEPARATOR, EMPTY_STRING);
}
}
@Override
public String getResDir(String tenantCode) {
return getAbsResDir(tenantCode) + FOLDER_SEPARATOR;
}
@Override
public String getUdfDir(String tenantCode) {
return getAbsUdfDir(tenantCode) + FOLDER_SEPARATOR;
}
@Override
public String getResourceFileName(String tenantCode, String fullName) {
String resDir = getResDir(tenantCode);
return fullName.replaceFirst(resDir, "");
}
@Override
public String getResourceFullName(String tenantCode, String fileName) {
if (fileName.startsWith(FOLDER_SEPARATOR)) {
fileName.replaceFirst(FOLDER_SEPARATOR, EMPTY_STRING);
}
return String.format(FORMAT_S_S, getAbsResDir(tenantCode), fileName);
}
@Override
public String getFileName(ResourceType resourceType, String tenantCode, String fileName) {
if (fileName.startsWith(FOLDER_SEPARATOR)) {
fileName = fileName.replaceFirst(FOLDER_SEPARATOR, EMPTY_STRING);
}
return getDir(resourceType, tenantCode) + fileName;
}
@Override
public void download(String tenantCode, String srcFilePath, String dstFilePath,
boolean overwrite) throws IOException {
File dstFile = new File(dstFilePath);
if (dstFile.isDirectory()) {
Files.delete(dstFile.toPath());
} else {
Files.createDirectories(dstFile.getParentFile().toPath());
}
BlobClient blobClient = blobContainerClient.getBlobClient(srcFilePath);
blobClient.downloadToFile(dstFilePath, true);
}
@Override
public boolean exists(String fullName) throws IOException {
return isObjectExists(fullName);
}
protected boolean isObjectExists(String objectName) {
return blobContainerClient.getBlobClient(objectName).exists();
}
@Override
public boolean delete(String filePath, boolean recursive) throws IOException {
try {
if (isObjectExists(filePath)) {
blobContainerClient.getBlobClient(filePath).delete();
}
return true;
} catch (Exception e) {
log.error("delete the object error,the resource path is {}", filePath);
return false;
}
}
@Override
public boolean delete(String fullName, List<String> childrenPathList, boolean recursive) throws IOException {
// append the resource fullName to the list for deletion.
childrenPathList.add(fullName);
boolean result = true;
for (String filePath : childrenPathList) {
if (!delete(filePath, recursive)) {
result = false;
}
}
return result;
}
@Override
public boolean copy(String srcPath, String dstPath, boolean deleteSource, boolean overwrite) throws IOException {
BlobClient srcBlobClient = blobContainerClient.getBlobClient(srcPath);
BlockBlobClient dstBlobClient = blobContainerClient.getBlobClient(dstPath).getBlockBlobClient();
dstBlobClient.uploadFromUrl(srcBlobClient.getBlobUrl(), overwrite);
if (deleteSource) {
srcBlobClient.delete();
}
return true;
}
@Override
public boolean upload(String tenantCode, String srcFile, String dstPath, boolean deleteSource,
boolean overwrite) throws IOException {
try {
BlobClient blobClient = blobContainerClient.getBlobClient(dstPath);
blobClient.uploadFromFile(srcFile, overwrite);
Path srcPath = Paths.get(srcFile);
if (deleteSource) {
Files.delete(srcPath);
}
return true;
} catch (Exception e) {
log.error("upload failed,the container is {},the filePath is {}", containerName, dstPath);
return false;
}
}
@Override
public List<String> vimFile(String tenantCode, String filePath, int skipLineNums, int limit) throws IOException {
if (StringUtils.isBlank(filePath)) {
log.error("file path:{} is blank", filePath);
return Collections.emptyList();
}
BlobClient blobClient = blobContainerClient.getBlobClient(filePath);
try (
BufferedReader bufferedReader =
new BufferedReader(new InputStreamReader(
new ByteArrayInputStream(blobClient.downloadContent().toBytes())))) {
Stream<String> stream = bufferedReader.lines().skip(skipLineNums).limit(limit);
return stream.collect(Collectors.toList());
}
}
@Override
public void deleteTenant(String tenantCode) throws Exception {
deleteTenantCode(tenantCode);
}
protected void deleteTenantCode(String tenantCode) {
deleteDirectory(getResDir(tenantCode));
deleteDirectory(getUdfDir(tenantCode));
}
@Override
public String getDir(ResourceType resourceType, String tenantCode) {
switch (resourceType) {
case UDF:
return getUdfDir(tenantCode);
case FILE:
return getResDir(tenantCode);
case ALL:
return getGcsDataBasePath();
default:
return EMPTY_STRING;
}
}
protected void deleteDirectory(String directoryName) {
if (isObjectExists(directoryName)) {
blobContainerClient.getBlobClient(directoryName).delete();
}
}
@Override
public boolean mkdir(String tenantCode, String path) throws IOException {
String objectName = path + FOLDER_SEPARATOR;
if (!isObjectExists(objectName)) {
BlobClient blobClient = blobContainerClient.getBlobClient(objectName);
blobClient.upload(new ByteArrayInputStream(EMPTY_STRING.getBytes()), 0);
}
return true;
}
@Override
public void close() throws IOException {
}
@Override
public ResUploadType returnStorageType() {
return ResUploadType.ABS;
}
@Override
public List<StorageEntity> listFilesStatusRecursively(String path, String defaultPath, String tenantCode,
ResourceType type) {
List<StorageEntity> storageEntityList = new ArrayList<>();
LinkedList<StorageEntity> foldersToFetch = new LinkedList<>();
StorageEntity initialEntity = null;
try {
initialEntity = getFileStatus(path, defaultPath, tenantCode, type);
} catch (Exception e) {
log.error("error while listing files status recursively, path: {}", path, e);
return storageEntityList;
}
foldersToFetch.add(initialEntity);
while (!foldersToFetch.isEmpty()) {
String pathToExplore = foldersToFetch.pop().getFullName();
try {
List<StorageEntity> tempList = listFilesStatus(pathToExplore, defaultPath, tenantCode, type);
for (StorageEntity temp : tempList) {
if (temp.isDirectory()) {
foldersToFetch.add(temp);
}
}
storageEntityList.addAll(tempList);
} catch (Exception e) {
log.error("error while listing files stat:wus recursively, path: {}", pathToExplore, e);
}
}
return storageEntityList;
}
@Override
public List<StorageEntity> listFilesStatus(String path, String defaultPath, String tenantCode,
ResourceType type) throws Exception {
List<StorageEntity> storageEntityList = new ArrayList<>();
PagedIterable<BlobItem> blobItems;
blobItems = blobContainerClient.listBlobsByHierarchy(path);
if (blobItems == null) {
return storageEntityList;
}
for (BlobItem blobItem : blobItems) {
if (path.equals(blobItem.getName())) {
continue;
}
if (blobItem.isPrefix()) {
String suffix = StringUtils.difference(path, blobItem.getName());
String fileName = StringUtils.difference(defaultPath, blobItem.getName());
StorageEntity entity = new StorageEntity();
entity.setAlias(suffix);
entity.setFileName(fileName);
entity.setFullName(blobItem.getName());
entity.setDirectory(true);
entity.setDescription(EMPTY_STRING);
entity.setUserName(tenantCode);
entity.setType(type);
entity.setSize(0);
entity.setCreateTime(null);
entity.setUpdateTime(null);
entity.setPfullName(path);
storageEntityList.add(entity);
} else {
String[] aliasArr = blobItem.getName().split("/");
String alias = aliasArr[aliasArr.length - 1];
String fileName = StringUtils.difference(defaultPath, blobItem.getName());
StorageEntity entity = new StorageEntity();
entity.setAlias(alias);
entity.setFileName(fileName);
entity.setFullName(blobItem.getName());
entity.setDirectory(false);
entity.setDescription(EMPTY_STRING);
entity.setUserName(tenantCode);
entity.setType(type);
entity.setSize(blobItem.getProperties().getContentLength());
entity.setCreateTime(Date.from(blobItem.getProperties().getCreationTime().toInstant()));
entity.setUpdateTime(Date.from(blobItem.getProperties().getLastModified().toInstant()));
entity.setPfullName(path);
storageEntityList.add(entity);
}
}
return storageEntityList;
}
@Override
public StorageEntity getFileStatus(String path, String defaultPath, String tenantCode,
ResourceType type) throws Exception {
if (path.endsWith(FOLDER_SEPARATOR)) {
// the path is a directory that may or may not exist
String alias = findDirAlias(path);
String fileName = StringUtils.difference(defaultPath, path);
StorageEntity entity = new StorageEntity();
entity.setAlias(alias);
entity.setFileName(fileName);
entity.setFullName(path);
entity.setDirectory(true);
entity.setDescription(EMPTY_STRING);
entity.setUserName(tenantCode);
entity.setType(type);
entity.setSize(0);
return entity;
} else {
if (isObjectExists(path)) {
BlobClient blobClient = blobContainerClient.getBlobClient(path);
String[] aliasArr = blobClient.getBlobName().split(FOLDER_SEPARATOR);
String alias = aliasArr[aliasArr.length - 1];
String fileName = StringUtils.difference(defaultPath, blobClient.getBlobName());
StorageEntity entity = new StorageEntity();
entity.setAlias(alias);
entity.setFileName(fileName);
entity.setFullName(blobClient.getBlobName());
entity.setDirectory(false);
entity.setDescription(EMPTY_STRING);
entity.setUserName(tenantCode);
entity.setType(type);
entity.setSize(blobClient.getProperties().getBlobSize());
entity.setCreateTime(Date.from(blobClient.getProperties().getCreationTime().toInstant()));
entity.setUpdateTime(Date.from(blobClient.getProperties().getLastModified().toInstant()));
return entity;
} else {
throw new FileNotFoundException("Object is not found in ABS container: " + containerName);
}
}
}
private String findDirAlias(String dirPath) {
if (!dirPath.endsWith(FOLDER_SEPARATOR)) {
return dirPath;
}
Path path = Paths.get(dirPath);
return path.getName(path.getNameCount() - 1) + FOLDER_SEPARATOR;
}
public void checkContainerNameExists() {
if (StringUtils.isBlank(containerName)) {
throw new IllegalArgumentException(containerName + " is blank");
}
boolean exist = false;
for (BlobContainerItem item : blobServiceClient.listBlobContainers()) {
if (containerName.equals(item.getName())) {
exist = true;
break;
}
}
if (!exist) {
throw new IllegalArgumentException(
"containerName: " + containerName + " is not exists, you need to create them by yourself");
} else {
log.info("containerName: {} has been found", containerName);
}
}
}

40
dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperatorFactory.java

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.storage.abs;
import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate;
import org.apache.dolphinscheduler.plugin.storage.api.StorageOperateFactory;
import org.apache.dolphinscheduler.plugin.storage.api.StorageType;
import com.google.auto.service.AutoService;
@AutoService(StorageOperateFactory.class)
public class AbsStorageOperatorFactory implements StorageOperateFactory {
@Override
public StorageOperate createStorageOperate() {
AbsStorageOperator absStorageOperator = new AbsStorageOperator();
absStorageOperator.init();
return absStorageOperator;
}
@Override
public StorageType getStorageOperate() {
return StorageType.ABS;
}
}

274
dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/test/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperatorTest.java

@ -0,0 +1,274 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.storage.abs;
import static org.apache.dolphinscheduler.common.constants.Constants.FOLDER_SEPARATOR;
import static org.apache.dolphinscheduler.common.constants.Constants.FORMAT_S_S;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity;
import org.apache.dolphinscheduler.spi.enums.ResourceType;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import com.azure.storage.blob.BlobClient;
import com.azure.storage.blob.BlobContainerClient;
import com.azure.storage.blob.BlobServiceClient;
import com.azure.storage.blob.specialized.BlockBlobClient;
@ExtendWith(MockitoExtension.class)
public class AbsStorageOperatorTest {
private static final String CONNECTION_STRING_MOCK = "CONNECTION_STRING_MOCK";
private static final String ACCOUNT_NAME_MOCK = "ACCOUNT_NAME_MOCK";
private static final String CONTAINER_NAME_MOCK = "CONTAINER_NAME_MOCK";
private static final String TENANT_CODE_MOCK = "TENANT_CODE_MOCK";
private static final String DIR_MOCK = "DIR_MOCK";
private static final String FILE_NAME_MOCK = "FILE_NAME_MOCK";
private static final String FILE_PATH_MOCK = "FILE_PATH_MOCK";
private static final String FULL_NAME = "/tmp/dir1/";
private static final String DEFAULT_PATH = "/tmp/";
@Mock
private BlobContainerClient blobContainerClient;
@Mock
private BlobServiceClient blobServiceClient;
@Mock
private BlockBlobClient blockBlobClient;
@Mock
private BlobClient blobClient;
private AbsStorageOperator absStorageOperator;
@BeforeEach
public void setUp() throws Exception {
absStorageOperator = Mockito.spy(AbsStorageOperator.class);
Mockito.doReturn(CONNECTION_STRING_MOCK).when(absStorageOperator).readConnectionString();
Mockito.doReturn(CONTAINER_NAME_MOCK).when(absStorageOperator).readContainerName();
Mockito.doReturn(ACCOUNT_NAME_MOCK).when(absStorageOperator).readAccountName();
Mockito.doReturn(blobContainerClient).when(absStorageOperator).buildBlobContainerClient();
Mockito.doReturn(blobServiceClient).when(absStorageOperator).buildBlobServiceClient();
Mockito.doNothing().when(absStorageOperator).checkContainerNameExists();
absStorageOperator.init();
}
@Test
public void testInit() throws Exception {
verify(absStorageOperator, times(1)).buildBlobServiceClient();
verify(absStorageOperator, times(1)).buildBlobContainerClient();
Assertions.assertEquals(CONNECTION_STRING_MOCK, absStorageOperator.getConnectionString());
Assertions.assertEquals(CONTAINER_NAME_MOCK, absStorageOperator.getContainerName());
Assertions.assertEquals(ACCOUNT_NAME_MOCK, absStorageOperator.getStorageAccountName());
}
@Test
public void createTenantResAndUdfDir() throws Exception {
doReturn(DIR_MOCK).when(absStorageOperator).getAbsResDir(TENANT_CODE_MOCK);
doReturn(DIR_MOCK).when(absStorageOperator).getAbsUdfDir(TENANT_CODE_MOCK);
doReturn(true).when(absStorageOperator).mkdir(TENANT_CODE_MOCK, DIR_MOCK);
absStorageOperator.createTenantDirIfNotExists(TENANT_CODE_MOCK);
verify(absStorageOperator, times(2)).mkdir(TENANT_CODE_MOCK, DIR_MOCK);
}
@Test
public void getResDir() {
final String expectedResourceDir = String.format("dolphinscheduler/%s/resources/", TENANT_CODE_MOCK);
final String dir = absStorageOperator.getResDir(TENANT_CODE_MOCK);
Assertions.assertEquals(expectedResourceDir, dir);
}
@Test
public void getUdfDir() {
final String expectedUdfDir = String.format("dolphinscheduler/%s/udfs/", TENANT_CODE_MOCK);
final String dir = absStorageOperator.getUdfDir(TENANT_CODE_MOCK);
Assertions.assertEquals(expectedUdfDir, dir);
}
@Test
public void mkdirWhenDirExists() {
boolean isSuccess = false;
try {
final String key = DIR_MOCK + FOLDER_SEPARATOR;
Mockito.doReturn(true).when(absStorageOperator).isObjectExists(key);
isSuccess = absStorageOperator.mkdir(TENANT_CODE_MOCK, DIR_MOCK);
} catch (IOException e) {
Assertions.fail("test failed due to unexpected IO exception");
}
Assertions.assertTrue(isSuccess);
}
@Test
public void getResourceFullName() {
final String expectedResourceFileName =
String.format("dolphinscheduler/%s/resources/%s", TENANT_CODE_MOCK, FILE_NAME_MOCK);
final String resourceFileName = absStorageOperator.getResourceFullName(TENANT_CODE_MOCK, FILE_NAME_MOCK);
Assertions.assertEquals(expectedResourceFileName, resourceFileName);
}
@Test
public void getFileName() {
final String expectedFileName =
String.format("dolphinscheduler/%s/resources/%s", TENANT_CODE_MOCK, FILE_NAME_MOCK);
final String fileName = absStorageOperator.getFileName(ResourceType.FILE, TENANT_CODE_MOCK, FILE_NAME_MOCK);
Assertions.assertEquals(expectedFileName, fileName);
}
@Test
public void exists() {
boolean doesExist = false;
doReturn(true).when(absStorageOperator).isObjectExists(FILE_NAME_MOCK);
try {
doesExist = absStorageOperator.exists(FILE_NAME_MOCK);
} catch (IOException e) {
Assertions.fail("unexpected IO exception in unit test");
}
Assertions.assertTrue(doesExist);
}
@Test
public void delete() {
boolean isDeleted = false;
doReturn(true).when(absStorageOperator).isObjectExists(FILE_NAME_MOCK);
Mockito.doReturn(blobClient).when(blobContainerClient).getBlobClient(Mockito.anyString());
try {
isDeleted = absStorageOperator.delete(FILE_NAME_MOCK, true);
} catch (IOException e) {
Assertions.fail("unexpected IO exception in unit test");
}
Assertions.assertTrue(isDeleted);
verify(blobClient, times(1)).delete();
}
@Test
public void copy() {
boolean isSuccess = false;
Mockito.doReturn(blobClient).when(blobContainerClient).getBlobClient(Mockito.anyString());
Mockito.doReturn(blockBlobClient).when(blobClient).getBlockBlobClient();
try {
isSuccess = absStorageOperator.copy(FILE_PATH_MOCK, FILE_PATH_MOCK, false, false);
} catch (IOException e) {
Assertions.fail("unexpected IO exception in unit test");
}
Assertions.assertTrue(isSuccess);
}
@Test
public void deleteTenant() {
doNothing().when(absStorageOperator).deleteTenantCode(anyString());
try {
absStorageOperator.deleteTenant(TENANT_CODE_MOCK);
} catch (Exception e) {
Assertions.fail("unexpected exception caught in unit test");
}
verify(absStorageOperator, times(1)).deleteTenantCode(anyString());
}
@Test
public void getGcsResDir() {
final String expectedGcsResDir = String.format("dolphinscheduler/%s/resources", TENANT_CODE_MOCK);
final String gcsResDir = absStorageOperator.getAbsResDir(TENANT_CODE_MOCK);
Assertions.assertEquals(expectedGcsResDir, gcsResDir);
}
@Test
public void getGcsUdfDir() {
final String expectedGcsUdfDir = String.format("dolphinscheduler/%s/udfs", TENANT_CODE_MOCK);
final String gcsUdfDir = absStorageOperator.getAbsUdfDir(TENANT_CODE_MOCK);
Assertions.assertEquals(expectedGcsUdfDir, gcsUdfDir);
}
@Test
public void getGcsTenantDir() {
final String expectedGcsTenantDir = String.format(FORMAT_S_S, DIR_MOCK, TENANT_CODE_MOCK);
doReturn(DIR_MOCK).when(absStorageOperator).getGcsDataBasePath();
final String gcsTenantDir = absStorageOperator.getAbsTenantDir(TENANT_CODE_MOCK);
Assertions.assertEquals(expectedGcsTenantDir, gcsTenantDir);
}
@Test
public void deleteDir() {
Mockito.doReturn(blobClient).when(blobContainerClient).getBlobClient(Mockito.anyString());
doReturn(true).when(absStorageOperator).isObjectExists(Mockito.any());
absStorageOperator.deleteDirectory(DIR_MOCK);
verify(blobClient, times(1)).delete();
}
@Test
public void testGetFileStatus() throws Exception {
StorageEntity entity =
absStorageOperator.getFileStatus(FULL_NAME, DEFAULT_PATH, TENANT_CODE_MOCK, ResourceType.FILE);
Assertions.assertEquals(FULL_NAME, entity.getFullName());
Assertions.assertEquals("dir1/", entity.getFileName());
}
@Test
public void testListFilesStatus() throws Exception {
Mockito.doReturn(null).when(blobContainerClient).listBlobsByHierarchy(Mockito.any());
List<StorageEntity> result =
absStorageOperator.listFilesStatus(FULL_NAME, DEFAULT_PATH, TENANT_CODE_MOCK, ResourceType.FILE);
verify(blobContainerClient, times(1)).listBlobsByHierarchy(Mockito.any());
}
@Test
public void testListFilesStatusRecursively() throws Exception {
StorageEntity entity = new StorageEntity();
entity.setFullName(FULL_NAME);
doReturn(entity).when(absStorageOperator).getFileStatus(FULL_NAME, DEFAULT_PATH, TENANT_CODE_MOCK,
ResourceType.FILE);
doReturn(Collections.EMPTY_LIST).when(absStorageOperator).listFilesStatus(anyString(), anyString(), anyString(),
Mockito.any(ResourceType.class));
List<StorageEntity> result =
absStorageOperator.listFilesStatusRecursively(FULL_NAME, DEFAULT_PATH, TENANT_CODE_MOCK,
ResourceType.FILE);
Assertions.assertEquals(0, result.size());
}
}

5
dolphinscheduler-storage-plugin/dolphinscheduler-storage-all/pom.xml

@ -52,6 +52,11 @@
<artifactId>dolphinscheduler-storage-gcs</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-storage-abs</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>

4
dolphinscheduler-storage-plugin/dolphinscheduler-storage-api/src/main/java/org/apache/dolphinscheduler/plugin/storage/api/StorageType.java

@ -25,7 +25,9 @@ public enum StorageType {
HDFS(1, "HDFS"),
OSS(2, "OSS"),
S3(3, "S3"),
GCS(4, "GCS");
GCS(4, "GCS"),
ABS(5, "ABS");
private final int code;
private final String name;

1
dolphinscheduler-storage-plugin/pom.xml

@ -34,6 +34,7 @@
<module>dolphinscheduler-storage-hdfs</module>
<module>dolphinscheduler-storage-oss</module>
<module>dolphinscheduler-storage-gcs</module>
<module>dolphinscheduler-storage-abs</module>
</modules>
<dependencyManagement>

2
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties

@ -21,7 +21,7 @@ data.basedir.path=/tmp/dolphinscheduler
# resource view suffixs
#resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js
# resource storage type: HDFS, S3, OSS, GCS, NONE
# resource storage type: HDFS, S3, OSS, GCS, ABS, NONE
resource.storage.type=NONE
# resource store on HDFS/S3 path, resource file will store to this base path, self configuration, please make sure the directory exists on hdfs and have read write permissions. "/dolphinscheduler" is recommended
resource.storage.upload.base.path=/dolphinscheduler

16
tools/dependencies/known-dependencies.txt

@ -371,8 +371,8 @@ jdom2-2.0.6.1.jar
jettison-1.1.jar
stax-api-1.0.1.jar
trino-jdbc-402.jar
azure-core-1.34.0.jar
azure-core-http-netty-1.12.7.jar
azure-core-1.36.0.jar
azure-core-http-netty-1.13.0.jar
azure-identity-1.7.1.jar
content-type-2.2.jar
jackson-dataformat-xml-2.13.3.jar
@ -423,13 +423,13 @@ azure-resourcemanager-servicebus-2.21.0.jar
azure-resourcemanager-sql-2.21.0.jar
azure-resourcemanager-storage-2.21.0.jar
azure-resourcemanager-trafficmanager-2.21.0.jar
azure-security-keyvault-keys-4.5.2.jar
azure-security-keyvault-secrets-4.5.2.jar
azure-storage-common-12.19.1.jar
azure-storage-file-share-12.16.1.jar
azure-security-keyvault-keys-4.5.4.jar
azure-security-keyvault-secrets-4.5.4.jar
azure-storage-common-12.20.0.jar
azure-storage-file-share-12.17.0.jar
nimbus-jose-jwt-9.22.jar
woodstox-core-6.4.0.jar
azure-core-management-1.9.0.jar
azure-core-management-1.10.1.jar
api-common-2.6.0.jar
auto-value-1.10.1.jar
auto-value-annotations-1.10.1.jar
@ -480,4 +480,6 @@ casdoor-spring-boot-starter-1.6.0.jar
org.apache.oltu.oauth2.client-1.0.2.jar
org.apache.oltu.oauth2.common-1.0.2.jar
snowflake-jdbc-3.13.10.jar
azure-storage-blob-12.21.0.jar
azure-storage-internal-avro-12.6.0.jar
vertica-jdbc-12.0.4-0.jar

Loading…
Cancel
Save