From 7ee66f2d029b8d0d860b59d5dfe1e8224ccf6b44 Mon Sep 17 00:00:00 2001 From: Rick Cheng <38122586+rickchengx@users.noreply.github.com> Date: Thu, 6 Apr 2023 17:49:33 +0800 Subject: [PATCH] [Feature-13429][Remote Logging] Add support for writing task logs to Google Cloud Storage (#13777) --- docs/docs/en/guide/remote-logging.md | 11 ++ docs/docs/zh/guide/remote-logging.md | 13 +- .../src/main/resources/logback-spring.xml | 1 + dolphinscheduler-common/pom.xml | 28 ++++ .../common/constants/Constants.java | 7 + .../log/remote/GcsRemoteLogHandler.java | 153 ++++++++++++++++++ .../log/remote/RemoteLogHandlerFactory.java | 2 + .../common/log/remote/RemoteLogUtils.java | 17 +- .../common/utils/LogUtils.java | 9 ++ .../src/main/resources/common.properties | 4 + .../log/remote/RemoteLogHandlerTest.java | 21 ++- .../src/main/resources/logback-spring.xml | 2 + .../src/main/resources/logback-spring.xml | 1 + .../src/main/resources/logback-spring.xml | 1 + 14 files changed, 255 insertions(+), 15 deletions(-) create mode 100644 dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/GcsRemoteLogHandler.java diff --git a/docs/docs/en/guide/remote-logging.md b/docs/docs/en/guide/remote-logging.md index 205c80405f..421b4b50a0 100644 --- a/docs/docs/en/guide/remote-logging.md +++ b/docs/docs/en/guide/remote-logging.md @@ -50,3 +50,14 @@ remote.logging.s3.endpoint= remote.logging.s3.region= ``` +## Writing task logs to [Google Cloud Storage (GCS)](https://cloud.google.com/storage) + +Configure `common.properties` as follows: + +```properties +# the location of the google cloud credential, required if you set remote.logging.target=GCS +remote.logging.google.cloud.storage.credential=/path/to/credential +# gcs bucket name, required if you set remote.logging.target=GCS +remote.logging.google.cloud.storage.bucket.name= +``` + diff --git a/docs/docs/zh/guide/remote-logging.md b/docs/docs/zh/guide/remote-logging.md index 9accca4bac..86e7f8219f 100644 --- a/docs/docs/zh/guide/remote-logging.md +++ b/docs/docs/zh/guide/remote-logging.md @@ -10,7 +10,7 @@ Apache DolphinScheduler支持将任务日志传输到远端存储上。当配置 ```properties # 是否开启远程日志存储 remote.logging.enable=true -# 任务日志写入的远端存储,目前支持OSS, S3 +# 任务日志写入的远端存储,目前支持OSS, S3, GCS remote.logging.target=OSS # 任务日志在远端存储上的目录 remote.logging.base.dir=logs @@ -50,3 +50,14 @@ remote.logging.s3.endpoint= remote.logging.s3.region= ``` +## 将任务日志写入[Google Cloud Storage (GCS)](https://cloud.google.com/storage) + +配置`common.propertis`如下: + +```properties +# the location of the google cloud credential, required if you set remote.logging.target=GCS +remote.logging.google.cloud.storage.credential=/path/to/credential +# gcs bucket name, required if you set remote.logging.target=GCS +remote.logging.google.cloud.storage.bucket.name= +``` + diff --git a/dolphinscheduler-api/src/main/resources/logback-spring.xml b/dolphinscheduler-api/src/main/resources/logback-spring.xml index d80fbf84d4..36ba43a497 100644 --- a/dolphinscheduler-api/src/main/resources/logback-spring.xml +++ b/dolphinscheduler-api/src/main/resources/logback-spring.xml @@ -18,6 +18,7 @@ + diff --git a/dolphinscheduler-common/pom.xml b/dolphinscheduler-common/pom.xml index a67425b51c..9e54f7da26 100644 --- a/dolphinscheduler-common/pom.xml +++ b/dolphinscheduler-common/pom.xml @@ -129,5 +129,33 @@ 6.1.26 test + + + com.google.cloud + google-cloud-storage + true + + + org.codehaus.mojo + animal-sniffer-annotations + + + io.perfmark + perfmark-api + + + com.google.api.grpc + proto-google-common-protos + + + com.google.protobuf + protobuf-java + + + com.google.protobuf + protobuf-java-util + + + diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java index ec28208dd3..940d5594ca 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java @@ -839,4 +839,11 @@ public final class Constants { public static final String REMOTE_LOGGING_S3_ENDPOINT = "remote.logging.s3.endpoint"; public static final String REMOTE_LOGGING_S3_REGION = "remote.logging.s3.region"; + + /** + * remote logging for GCS + */ + public static final String REMOTE_LOGGING_GCS_CREDENTIAL = "remote.logging.google.cloud.storage.credential"; + + public static final String REMOTE_LOGGING_GCS_BUCKET_NAME = "remote.logging.google.cloud.storage.bucket.name"; } diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/GcsRemoteLogHandler.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/GcsRemoteLogHandler.java new file mode 100644 index 0000000000..20fd30336e --- /dev/null +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/GcsRemoteLogHandler.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.common.log.remote; + +import org.apache.dolphinscheduler.common.constants.Constants; +import org.apache.dolphinscheduler.common.utils.PropertyUtils; + +import org.apache.commons.lang3.StringUtils; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +import lombok.extern.slf4j.Slf4j; + +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.BlobId; +import com.google.cloud.storage.BlobInfo; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; + +@Slf4j +public class GcsRemoteLogHandler implements RemoteLogHandler, Closeable { + + private Storage gcsStorage; + + private String bucketName; + + private String credential; + + private static GcsRemoteLogHandler instance; + + private GcsRemoteLogHandler() { + + } + + public static synchronized GcsRemoteLogHandler getInstance() { + if (instance == null) { + instance = new GcsRemoteLogHandler(); + instance.init(); + } + + return instance; + } + + public void init() { + try { + credential = readCredentials(); + bucketName = readBucketName(); + gcsStorage = buildGcsStorage(credential); + + checkBucketNameExists(bucketName); + } catch (IOException e) { + log.error("GCS Remote Log Handler init failed", e); + } + } + + @Override + public void close() throws IOException { + try { + if (gcsStorage != null) { + gcsStorage.close(); + } + } catch (Exception e) { + throw new IOException(e); + } + } + + @Override + public void sendRemoteLog(String logPath) { + String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath); + + try { + log.info("send remote log {} to GCS {}", logPath, objectName); + + BlobInfo blobInfo = BlobInfo.newBuilder( + BlobId.of(bucketName, objectName)).build(); + + gcsStorage.create(blobInfo, Files.readAllBytes(Paths.get(logPath))); + } catch (Exception e) { + log.error("error while sending remote log {} to GCS {}", logPath, objectName, e); + } + } + + @Override + public void getRemoteLog(String logPath) { + String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath); + + try { + log.info("get remote log on GCS {} to {}", objectName, logPath); + + Blob blob = gcsStorage.get(BlobId.of(bucketName, objectName)); + blob.downloadTo(Paths.get(logPath)); + } catch (Exception e) { + log.error("error while getting remote log on GCS {} to {}", objectName, logPath, e); + } + } + + protected Storage buildGcsStorage(String credential) throws IOException { + return StorageOptions.newBuilder() + .setCredentials(ServiceAccountCredentials.fromStream( + Files.newInputStream(Paths.get(credential)))) + .build() + .getService(); + } + + protected String readCredentials() { + return PropertyUtils.getString(Constants.REMOTE_LOGGING_GCS_CREDENTIAL); + } + + protected String readBucketName() { + return PropertyUtils.getString(Constants.REMOTE_LOGGING_GCS_BUCKET_NAME); + } + + public void checkBucketNameExists(String bucketName) { + if (StringUtils.isBlank(bucketName)) { + throw new IllegalArgumentException(Constants.REMOTE_LOGGING_GCS_BUCKET_NAME + " is blank"); + } + + boolean exist = false; + for (Bucket bucket : gcsStorage.list().iterateAll()) { + if (bucketName.equals(bucket.getName())) { + exist = true; + break; + } + } + + if (!exist) { + log.error( + "bucketName: {} does not exist, you need to create them by yourself", bucketName); + } else { + log.info("bucketName: {} has been found", bucketName); + } + } +} diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerFactory.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerFactory.java index 328f7768d8..73ab41a134 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerFactory.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerFactory.java @@ -37,6 +37,8 @@ public class RemoteLogHandlerFactory { return OssRemoteLogHandler.getInstance(); } else if ("S3".equals(target)) { return S3RemoteLogHandler.getInstance(); + } else if ("GCS".equals(target)) { + return GcsRemoteLogHandler.getInstance(); } log.error("No suitable remote logging target for {}", target); diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogUtils.java index 7df90a9dec..25d8024474 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogUtils.java @@ -17,6 +17,8 @@ package org.apache.dolphinscheduler.common.log.remote; +import static org.apache.dolphinscheduler.common.utils.LogUtils.getLocalLogBaseDir; + import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.utils.PropertyUtils; @@ -36,8 +38,6 @@ public class RemoteLogUtils { private static RemoteLogService remoteLogService; - private static final int OBJECT_NAME_COUNT = 2; - @Autowired private RemoteLogService autowiredRemoteLogService; @@ -79,16 +79,13 @@ public class RemoteLogUtils { } public static String getObjectNameFromLogPath(String logPath) { + Path localLogBaseDirPath = Paths.get(getLocalLogBaseDir()).toAbsolutePath(); + Path path = Paths.get(logPath); int nameCount = path.getNameCount(); - String logBaseDir = PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR); - - if (nameCount < OBJECT_NAME_COUNT) { - return Paths.get(logBaseDir, logPath).toString(); - } else { - return Paths.get(logBaseDir, path.subpath(nameCount - OBJECT_NAME_COUNT, nameCount).toString()) - .toString(); - } + String remoteLogBaseDir = PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR); + return Paths.get(remoteLogBaseDir, path.subpath(localLogBaseDirPath.getNameCount(), nameCount).toString()) + .toString(); } } diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LogUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LogUtils.java index 47067f2845..b1337f1c8a 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LogUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LogUtils.java @@ -35,6 +35,10 @@ import java.util.stream.Stream; import lombok.extern.slf4j.Slf4j; +import org.slf4j.LoggerFactory; + +import ch.qos.logback.classic.LoggerContext; + @Slf4j public class LogUtils { @@ -160,4 +164,9 @@ public class LogUtils { return builder.toString(); } + public static String getLocalLogBaseDir() { + LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); + return loggerContext.getProperty("log.base.ctx"); + } + } diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 7dff4f63e1..90c4d82ded 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -173,4 +173,8 @@ remote.logging.s3.bucket.name= remote.logging.s3.endpoint= # s3 region, required if you set remote.logging.target=S3 remote.logging.s3.region= +# the location of the google cloud credential, required if you set remote.logging.target=GCS +remote.logging.google.cloud.storage.credential=/path/to/credential +# gcs bucket name, required if you set remote.logging.target=GCS +remote.logging.google.cloud.storage.bucket.name= diff --git a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerTest.java b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerTest.java index 6513f0c190..b91c8807b7 100644 --- a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerTest.java +++ b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerTest.java @@ -18,8 +18,14 @@ package org.apache.dolphinscheduler.common.log.remote; import org.apache.dolphinscheduler.common.constants.Constants; +import org.apache.dolphinscheduler.common.utils.LogUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; +import java.nio.file.Path; +import java.nio.file.Paths; + +import lombok.extern.slf4j.Slf4j; + import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -27,20 +33,27 @@ import org.mockito.MockedStatic; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +@Slf4j @ExtendWith(MockitoExtension.class) public class RemoteLogHandlerTest { @Test public void testGetObjectNameFromLogPath() { - final String logPath = "/path/to/dolphinscheduler/logs/20230116/8245922982496_1-1-3.log"; - final String expectedObjectName = "logs/20230116/8245922982496_1-1-3.log"; + Path currentRelativePath = Paths.get(""); + String currentDir = currentRelativePath.toAbsolutePath().toString(); + final String logPath = currentDir + "/logs/20230116/8245922982496/1/1/1.log"; + log.info("logPath is: {}", logPath); - try (MockedStatic propertyUtilsMockedStatic = Mockito.mockStatic(PropertyUtils.class)) { + final String expectedObjectName = "logs/20230116/8245922982496/1/1/1.log"; + + try ( + MockedStatic propertyUtilsMockedStatic = Mockito.mockStatic(PropertyUtils.class); + MockedStatic remoteLogUtilsMockedStatic = Mockito.mockStatic(LogUtils.class)) { propertyUtilsMockedStatic.when(() -> PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR)) .thenReturn("logs"); + remoteLogUtilsMockedStatic.when(LogUtils::getLocalLogBaseDir).thenReturn("logs"); String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath); - Assertions.assertEquals(expectedObjectName, objectName); } } diff --git a/dolphinscheduler-master/src/main/resources/logback-spring.xml b/dolphinscheduler-master/src/main/resources/logback-spring.xml index deb791fae2..86bad555ef 100644 --- a/dolphinscheduler-master/src/main/resources/logback-spring.xml +++ b/dolphinscheduler-master/src/main/resources/logback-spring.xml @@ -18,6 +18,8 @@ + + diff --git a/dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml b/dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml index 90f7290e15..bc2d3a405f 100644 --- a/dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml +++ b/dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml @@ -18,6 +18,7 @@ + diff --git a/dolphinscheduler-worker/src/main/resources/logback-spring.xml b/dolphinscheduler-worker/src/main/resources/logback-spring.xml index d63ea4a0b5..a5172ae817 100644 --- a/dolphinscheduler-worker/src/main/resources/logback-spring.xml +++ b/dolphinscheduler-worker/src/main/resources/logback-spring.xml @@ -18,6 +18,7 @@ +