Browse Source

[Improvement-13751][Worker] Support real-time pod log collection (#13752)

* Support real-time pod log collection

* datax task test have to mock ShellCommandExecutor with whole dependencies

* replace \n with EMPTY_STRING

* modify error log msg

* Update dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java

Co-authored-by: caishunfeng <caishunfeng2021@gmail.com>

* throw exception in pod log collection thread

---------

Co-authored-by: caishunfeng <caishunfeng2021@gmail.com>
dependabot/maven/dolphinscheduler-bom/org.apache.spark-spark-core_2.12-3.4.0
Aaron Wang 1 year ago committed by GitHub
parent
commit
d6fcf60534
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 73
      dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java
  2. 19
      dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/am/KubernetesApplicationManager.java
  3. 5
      dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java
  4. 4
      dolphinscheduler-task-plugin/dolphinscheduler-task-datax/pom.xml
  5. 12
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java

73
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java

@ -17,6 +17,8 @@
package org.apache.dolphinscheduler.plugin.task.api;
import static org.apache.dolphinscheduler.common.constants.Constants.EMPTY_STRING;
import static org.apache.dolphinscheduler.common.constants.Constants.SLEEP_TIME_MILLIS;
import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EXIT_CODE_FAILURE;
import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EXIT_CODE_KILL;
@ -40,6 +42,7 @@ import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@ -54,6 +57,7 @@ import java.util.regex.Pattern;
import org.slf4j.Logger;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import io.fabric8.kubernetes.client.dsl.LogWatch;
/**
* abstract command executor
@ -86,7 +90,9 @@ public abstract class AbstractCommandExecutor {
*/
protected LinkedBlockingQueue<String> logBuffer;
protected boolean logOutputIsSuccess = false;
protected boolean processLogOutputIsSuccess = false;
protected boolean podLogOutputIsFinished = false;
/*
* SHELL result string
@ -100,6 +106,8 @@ public abstract class AbstractCommandExecutor {
protected Future<?> taskOutputFuture;
protected Future<?> podLogOutputFuture;
public AbstractCommandExecutor(Consumer<LinkedBlockingQueue<String>> logHandler,
TaskExecutionContext taskRequest,
Logger logger) {
@ -107,6 +115,7 @@ public abstract class AbstractCommandExecutor {
this.taskRequest = taskRequest;
this.logger = logger;
this.logBuffer = new LinkedBlockingQueue<>();
this.logBuffer.add(EMPTY_STRING);
if (this.taskRequest != null) {
// set logBufferEnable=true if the task uses logHandler and logBuffer to buffer log messages
@ -215,6 +224,9 @@ public abstract class AbstractCommandExecutor {
// parse process output
parseProcessOutput(process);
// collect pod log
collectPodLogIfNeeded();
int processId = getProcessId(process);
result.setProcessId(processId);
@ -251,6 +263,17 @@ public abstract class AbstractCommandExecutor {
}
}
if (podLogOutputFuture != null) {
try {
// Wait kubernetes pod log collection finished
podLogOutputFuture.get();
// delete pod after successful execution and log collection
ProcessUtils.cancelApplication(taskRequest);
} catch (ExecutionException e) {
logger.error("Handle pod log error", e);
}
}
TaskExecutionStatus kubernetesStatus =
ProcessUtils.getApplicationStatus(taskRequest.getK8sTaskExecutionContext(), taskRequest.getTaskAppId());
@ -296,6 +319,45 @@ public abstract class AbstractCommandExecutor {
logger.info("task run command: {}", String.join(" ", commands));
}
private void collectPodLogIfNeeded() {
if (null == taskRequest.getK8sTaskExecutionContext()) {
podLogOutputIsFinished = true;
return;
}
// wait for launching (driver) pod
ThreadUtils.sleep(SLEEP_TIME_MILLIS * 5L);
LogWatch watcher =
ProcessUtils.getPodLogWatcher(taskRequest.getK8sTaskExecutionContext(), taskRequest.getTaskAppId());
if (watcher != null) {
ExecutorService collectPodLogExecutorService = ThreadUtils
.newSingleDaemonScheduledExecutorService("CollectPodLogOutput-thread-" + taskRequest.getTaskName());
podLogOutputFuture = collectPodLogExecutorService.submit(() -> {
try {
String line;
try (BufferedReader reader = new BufferedReader(new InputStreamReader(watcher.getOutput()))) {
while ((line = reader.readLine()) != null) {
logBuffer.add(String.format("[K8S-pod-log-%s]: %s", taskRequest.getTaskName(), line));
}
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
watcher.close();
podLogOutputIsFinished = true;
}
});
collectPodLogExecutorService.shutdown();
} else {
CompletableFuture<String> exceptionalFuture = new CompletableFuture<>();
exceptionalFuture.completeExceptionally(new RuntimeException("The driver pod does not exist."));
podLogOutputFuture = exceptionalFuture;
podLogOutputIsFinished = true;
}
}
private void parseProcessOutput(Process process) {
// todo: remove this this thread pool.
ExecutorService getOutputLogService = ThreadUtils
@ -315,10 +377,10 @@ public abstract class AbstractCommandExecutor {
taskResultString = line;
}
}
logOutputIsSuccess = true;
processLogOutputIsSuccess = true;
} catch (Exception e) {
logger.error("Parse var pool error", e);
logOutputIsSuccess = true;
processLogOutputIsSuccess = true;
}
});
@ -330,10 +392,11 @@ public abstract class AbstractCommandExecutor {
try (
final LogUtils.MDCAutoClosableContext mdcAutoClosableContext =
LogUtils.setTaskInstanceLogFullPathMDC(taskRequest.getLogPath());) {
while (!logBuffer.isEmpty() || !logOutputIsSuccess) {
if (!logBuffer.isEmpty()) {
while (logBuffer.size() > 1 || !processLogOutputIsSuccess || !podLogOutputIsFinished) {
if (logBuffer.size() > 1) {
logHandler.accept(logBuffer);
logBuffer.clear();
logBuffer.add(EMPTY_STRING);
} else {
Thread.sleep(TaskConstants.DEFAULT_LOG_FLUSH_INTERVAL);
}

19
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/am/KubernetesApplicationManager.java

@ -41,6 +41,7 @@ import io.fabric8.kubernetes.client.Config;
import io.fabric8.kubernetes.client.DefaultKubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.dsl.FilterWatchListDeletable;
import io.fabric8.kubernetes.client.dsl.LogWatch;
@Slf4j
@AutoService(ApplicationManager.class)
@ -170,27 +171,23 @@ public class KubernetesApplicationManager implements ApplicationManager {
}
/**
* collect pod's log
* get pod's log watcher
*
* @param kubernetesApplicationManagerContext
* @return
*/
public String collectPodLog(KubernetesApplicationManagerContext kubernetesApplicationManagerContext) {
public LogWatch getPodLogWatcher(KubernetesApplicationManagerContext kubernetesApplicationManagerContext) {
KubernetesClient client = getClient(kubernetesApplicationManagerContext);
FilterWatchListDeletable<Pod, PodList> watchList = getDriverPod(kubernetesApplicationManagerContext);
List<Pod> driverPod = watchList.list().getItems();
if (CollectionUtils.isEmpty(driverPod)) {
return "The driver pod does not exist.";
return null;
}
Pod driver = driverPod.get(0);
String driverPodName = driver.getMetadata().getName();
String logs = client.pods()
.inNamespace(kubernetesApplicationManagerContext.getK8sTaskExecutionContext().getNamespace())
.withName(driverPodName).getLog();
// delete driver pod only after successful execution
killApplication(kubernetesApplicationManagerContext);
return logs;
return client.pods().inNamespace(driver.getMetadata().getNamespace())
.withName(driver.getMetadata().getName())
.watchLog();
}
}

5
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java

@ -49,6 +49,7 @@ import java.util.regex.Pattern;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import io.fabric8.kubernetes.client.dsl.LogWatch;
@Slf4j
public final class ProcessUtils {
@ -204,12 +205,12 @@ public final class ProcessUtils {
* @param taskAppId
* @return
*/
public static String getPodLog(K8sTaskExecutionContext k8sTaskExecutionContext, String taskAppId) {
public static LogWatch getPodLogWatcher(K8sTaskExecutionContext k8sTaskExecutionContext, String taskAppId) {
KubernetesApplicationManager applicationManager =
(KubernetesApplicationManager) applicationManagerMap.get(ResourceManagerType.KUBERNETES);
return applicationManager
.collectPodLog(new KubernetesApplicationManagerContext(k8sTaskExecutionContext, taskAppId));
.getPodLogWatcher(new KubernetesApplicationManagerContext(k8sTaskExecutionContext, taskAppId));
}
}

4
dolphinscheduler-task-plugin/dolphinscheduler-task-datax/pom.xml

@ -54,5 +54,9 @@
<artifactId>druid</artifactId>
</dependency>
<dependency>
<groupId>io.fabric8</groupId>
<artifactId>kubernetes-client</artifactId>
</dependency>
</dependencies>
</project>

12
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java

@ -293,27 +293,17 @@ public abstract class WorkerTaskExecuteRunnable implements Runnable {
}
}
protected void writePodLodIfNeeded() {
if (null == taskExecutionContext.getK8sTaskExecutionContext()) {
return;
}
log.info("The current task is k8s task, begin to write pod log");
ProcessUtils.getPodLog(taskExecutionContext.getK8sTaskExecutionContext(), taskExecutionContext.getTaskAppId());
}
protected void closeLogAppender() {
try {
writePodLodIfNeeded();
if (RemoteLogUtils.isRemoteLoggingEnable()) {
RemoteLogUtils.sendRemoteLog(taskExecutionContext.getLogPath());
log.info("Log handler sends task log {} to remote storage asynchronously.",
taskExecutionContext.getLogPath());
}
} catch (Exception ex) {
log.error("Write k8s pod log failed", ex);
log.error("Send remote log failed", ex);
} finally {
log.info(FINALIZE_SESSION_MARKER, FINALIZE_SESSION_MARKER.toString());
}
}

Loading…
Cancel
Save