Browse Source

[Feature-13429][Remote Logging] Add support for writing task logs to Google Cloud Storage (#13777)

3.2.0-release
Rick Cheng 2 years ago committed by GitHub
parent
commit
7ee66f2d02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      docs/docs/en/guide/remote-logging.md
  2. 13
      docs/docs/zh/guide/remote-logging.md
  3. 1
      dolphinscheduler-api/src/main/resources/logback-spring.xml
  4. 28
      dolphinscheduler-common/pom.xml
  5. 7
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java
  6. 153
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/GcsRemoteLogHandler.java
  7. 2
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerFactory.java
  8. 17
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogUtils.java
  9. 9
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LogUtils.java
  10. 4
      dolphinscheduler-common/src/main/resources/common.properties
  11. 21
      dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerTest.java
  12. 2
      dolphinscheduler-master/src/main/resources/logback-spring.xml
  13. 1
      dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml
  14. 1
      dolphinscheduler-worker/src/main/resources/logback-spring.xml

11
docs/docs/en/guide/remote-logging.md

@ -50,3 +50,14 @@ remote.logging.s3.endpoint=<endpoint>
remote.logging.s3.region=<region>
```
## Writing task logs to [Google Cloud Storage (GCS)](https://cloud.google.com/storage)
Configure `common.properties` as follows:
```properties
# the location of the google cloud credential, required if you set remote.logging.target=GCS
remote.logging.google.cloud.storage.credential=/path/to/credential
# gcs bucket name, required if you set remote.logging.target=GCS
remote.logging.google.cloud.storage.bucket.name=<your-bucket>
```

13
docs/docs/zh/guide/remote-logging.md

@ -10,7 +10,7 @@ Apache DolphinScheduler支持将任务日志传输到远端存储上。当配置
```properties
# 是否开启远程日志存储
remote.logging.enable=true
# 任务日志写入的远端存储,目前支持OSS, S3
# 任务日志写入的远端存储,目前支持OSS, S3, GCS
remote.logging.target=OSS
# 任务日志在远端存储上的目录
remote.logging.base.dir=logs
@ -50,3 +50,14 @@ remote.logging.s3.endpoint=<endpoint>
remote.logging.s3.region=<region>
```
## 将任务日志写入[Google Cloud Storage (GCS)](https://cloud.google.com/storage)
配置`common.propertis`如下:
```properties
# the location of the google cloud credential, required if you set remote.logging.target=GCS
remote.logging.google.cloud.storage.credential=/path/to/credential
# gcs bucket name, required if you set remote.logging.target=GCS
remote.logging.google.cloud.storage.bucket.name=<your-bucket>
```

1
dolphinscheduler-api/src/main/resources/logback-spring.xml

@ -18,6 +18,7 @@
<configuration scan="true" scanPeriod="120 seconds">
<property name="log.base" value="logs"/>
<property scope="context" name="log.base.ctx" value="${log.base}" />
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>

28
dolphinscheduler-common/pom.xml

@ -129,5 +129,33 @@
<version>6.1.26</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
<optional>true</optional>
<exclusions>
<exclusion>
<groupId>org.codehaus.mojo</groupId>
<artifactId>animal-sniffer-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>io.perfmark</groupId>
<artifactId>perfmark-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.api.grpc</groupId>
<artifactId>proto-google-common-protos</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java-util</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>

7
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java

@ -839,4 +839,11 @@ public final class Constants {
public static final String REMOTE_LOGGING_S3_ENDPOINT = "remote.logging.s3.endpoint";
public static final String REMOTE_LOGGING_S3_REGION = "remote.logging.s3.region";
/**
* remote logging for GCS
*/
public static final String REMOTE_LOGGING_GCS_CREDENTIAL = "remote.logging.google.cloud.storage.credential";
public static final String REMOTE_LOGGING_GCS_BUCKET_NAME = "remote.logging.google.cloud.storage.bucket.name";
}

153
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/GcsRemoteLogHandler.java

@ -0,0 +1,153 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.common.log.remote;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import lombok.extern.slf4j.Slf4j;
import com.google.auth.oauth2.ServiceAccountCredentials;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.Bucket;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
@Slf4j
public class GcsRemoteLogHandler implements RemoteLogHandler, Closeable {
private Storage gcsStorage;
private String bucketName;
private String credential;
private static GcsRemoteLogHandler instance;
private GcsRemoteLogHandler() {
}
public static synchronized GcsRemoteLogHandler getInstance() {
if (instance == null) {
instance = new GcsRemoteLogHandler();
instance.init();
}
return instance;
}
public void init() {
try {
credential = readCredentials();
bucketName = readBucketName();
gcsStorage = buildGcsStorage(credential);
checkBucketNameExists(bucketName);
} catch (IOException e) {
log.error("GCS Remote Log Handler init failed", e);
}
}
@Override
public void close() throws IOException {
try {
if (gcsStorage != null) {
gcsStorage.close();
}
} catch (Exception e) {
throw new IOException(e);
}
}
@Override
public void sendRemoteLog(String logPath) {
String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath);
try {
log.info("send remote log {} to GCS {}", logPath, objectName);
BlobInfo blobInfo = BlobInfo.newBuilder(
BlobId.of(bucketName, objectName)).build();
gcsStorage.create(blobInfo, Files.readAllBytes(Paths.get(logPath)));
} catch (Exception e) {
log.error("error while sending remote log {} to GCS {}", logPath, objectName, e);
}
}
@Override
public void getRemoteLog(String logPath) {
String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath);
try {
log.info("get remote log on GCS {} to {}", objectName, logPath);
Blob blob = gcsStorage.get(BlobId.of(bucketName, objectName));
blob.downloadTo(Paths.get(logPath));
} catch (Exception e) {
log.error("error while getting remote log on GCS {} to {}", objectName, logPath, e);
}
}
protected Storage buildGcsStorage(String credential) throws IOException {
return StorageOptions.newBuilder()
.setCredentials(ServiceAccountCredentials.fromStream(
Files.newInputStream(Paths.get(credential))))
.build()
.getService();
}
protected String readCredentials() {
return PropertyUtils.getString(Constants.REMOTE_LOGGING_GCS_CREDENTIAL);
}
protected String readBucketName() {
return PropertyUtils.getString(Constants.REMOTE_LOGGING_GCS_BUCKET_NAME);
}
public void checkBucketNameExists(String bucketName) {
if (StringUtils.isBlank(bucketName)) {
throw new IllegalArgumentException(Constants.REMOTE_LOGGING_GCS_BUCKET_NAME + " is blank");
}
boolean exist = false;
for (Bucket bucket : gcsStorage.list().iterateAll()) {
if (bucketName.equals(bucket.getName())) {
exist = true;
break;
}
}
if (!exist) {
log.error(
"bucketName: {} does not exist, you need to create them by yourself", bucketName);
} else {
log.info("bucketName: {} has been found", bucketName);
}
}
}

2
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerFactory.java

@ -37,6 +37,8 @@ public class RemoteLogHandlerFactory {
return OssRemoteLogHandler.getInstance();
} else if ("S3".equals(target)) {
return S3RemoteLogHandler.getInstance();
} else if ("GCS".equals(target)) {
return GcsRemoteLogHandler.getInstance();
}
log.error("No suitable remote logging target for {}", target);

17
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogUtils.java

@ -17,6 +17,8 @@
package org.apache.dolphinscheduler.common.log.remote;
import static org.apache.dolphinscheduler.common.utils.LogUtils.getLocalLogBaseDir;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
@ -36,8 +38,6 @@ public class RemoteLogUtils {
private static RemoteLogService remoteLogService;
private static final int OBJECT_NAME_COUNT = 2;
@Autowired
private RemoteLogService autowiredRemoteLogService;
@ -79,16 +79,13 @@ public class RemoteLogUtils {
}
public static String getObjectNameFromLogPath(String logPath) {
Path localLogBaseDirPath = Paths.get(getLocalLogBaseDir()).toAbsolutePath();
Path path = Paths.get(logPath);
int nameCount = path.getNameCount();
String logBaseDir = PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR);
if (nameCount < OBJECT_NAME_COUNT) {
return Paths.get(logBaseDir, logPath).toString();
} else {
return Paths.get(logBaseDir, path.subpath(nameCount - OBJECT_NAME_COUNT, nameCount).toString())
.toString();
}
String remoteLogBaseDir = PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR);
return Paths.get(remoteLogBaseDir, path.subpath(localLogBaseDirPath.getNameCount(), nameCount).toString())
.toString();
}
}

9
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LogUtils.java

@ -35,6 +35,10 @@ import java.util.stream.Stream;
import lombok.extern.slf4j.Slf4j;
import org.slf4j.LoggerFactory;
import ch.qos.logback.classic.LoggerContext;
@Slf4j
public class LogUtils {
@ -160,4 +164,9 @@ public class LogUtils {
return builder.toString();
}
public static String getLocalLogBaseDir() {
LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
return loggerContext.getProperty("log.base.ctx");
}
}

4
dolphinscheduler-common/src/main/resources/common.properties

@ -173,4 +173,8 @@ remote.logging.s3.bucket.name=<bucket.name>
remote.logging.s3.endpoint=<endpoint>
# s3 region, required if you set remote.logging.target=S3
remote.logging.s3.region=<region>
# the location of the google cloud credential, required if you set remote.logging.target=GCS
remote.logging.google.cloud.storage.credential=/path/to/credential
# gcs bucket name, required if you set remote.logging.target=GCS
remote.logging.google.cloud.storage.bucket.name=<your-bucket>

21
dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerTest.java

@ -18,8 +18,14 @@
package org.apache.dolphinscheduler.common.log.remote;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.utils.LogUtils;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import java.nio.file.Path;
import java.nio.file.Paths;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
@ -27,20 +33,27 @@ import org.mockito.MockedStatic;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
@Slf4j
@ExtendWith(MockitoExtension.class)
public class RemoteLogHandlerTest {
@Test
public void testGetObjectNameFromLogPath() {
final String logPath = "/path/to/dolphinscheduler/logs/20230116/8245922982496_1-1-3.log";
final String expectedObjectName = "logs/20230116/8245922982496_1-1-3.log";
Path currentRelativePath = Paths.get("");
String currentDir = currentRelativePath.toAbsolutePath().toString();
final String logPath = currentDir + "/logs/20230116/8245922982496/1/1/1.log";
log.info("logPath is: {}", logPath);
try (MockedStatic<PropertyUtils> propertyUtilsMockedStatic = Mockito.mockStatic(PropertyUtils.class)) {
final String expectedObjectName = "logs/20230116/8245922982496/1/1/1.log";
try (
MockedStatic<PropertyUtils> propertyUtilsMockedStatic = Mockito.mockStatic(PropertyUtils.class);
MockedStatic<LogUtils> remoteLogUtilsMockedStatic = Mockito.mockStatic(LogUtils.class)) {
propertyUtilsMockedStatic.when(() -> PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR))
.thenReturn("logs");
remoteLogUtilsMockedStatic.when(LogUtils::getLocalLogBaseDir).thenReturn("logs");
String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath);
Assertions.assertEquals(expectedObjectName, objectName);
}
}

2
dolphinscheduler-master/src/main/resources/logback-spring.xml

@ -18,6 +18,8 @@
<configuration scan="true" scanPeriod="120 seconds">
<property name="log.base" value="logs"/>
<property scope="context" name="log.base.ctx" value="${log.base}" />
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>

1
dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml

@ -18,6 +18,7 @@
<configuration scan="true" scanPeriod="120 seconds">
<property name="log.base" value="logs"/>
<property scope="context" name="log.base.ctx" value="${log.base}" />
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>

1
dolphinscheduler-worker/src/main/resources/logback-spring.xml

@ -18,6 +18,7 @@
<configuration scan="true" scanPeriod="120 seconds">
<property name="log.base" value="logs"/>
<property scope="context" name="log.base.ctx" value="${log.base}" />
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>

Loading…
Cancel
Save