diff --git a/docs/docs/en/guide/metrics/metrics.md b/docs/docs/en/guide/metrics/metrics.md index 9ecdc3619f..1b5d099baf 100644 --- a/docs/docs/en/guide/metrics/metrics.md +++ b/docs/docs/en/guide/metrics/metrics.md @@ -60,14 +60,15 @@ For example, you can get the master metrics by `curl http://localhost:5679/actua ### Task Related Metrics -- ds.task.timeout.count: (counter) the number of timeout tasks -- ds.task.finish.count: (counter) the number of finished tasks, both succeeded and failed included -- ds.task.success.count: (counter) the number of successful tasks -- ds.task.failure.count: (counter) the number of failed tasks -- ds.task.stop.count: (counter) the number of stopped tasks -- ds.task.retry.count: (counter) the number of retried tasks -- ds.task.submit.count: (counter) the number of submitted tasks -- ds.task.failover.count: (counter) the number of task fail-overs +- ds.task.instance.count: (counter) the number of task instances, sliced by the tag `state`: + - timeout: the number of timeout tasks + - finish: the number of finished tasks, both succeeded and failed included + - success: the number of successful tasks + - fail: the number of failed tasks + - stop: the number of stopped tasks + - retry: the number of retried tasks + - submit: the number of submitted tasks + - failover: the number of task fail-overs - ds.task.dispatch.count: (counter) the number of tasks dispatched to worker - ds.task.dispatch.failure.count: (counter) the number of tasks failed to dispatch, retry failure included - ds.task.dispatch.error.count: (counter) the number of task dispatch errors @@ -83,12 +84,13 @@ For example, you can get the master metrics by `curl http://localhost:5679/actua - ds.workflow.create.command.count: (counter) the number of commands created and inserted by workflows - ds.workflow.instance.submit.count: (counter) the number of submitted workflow instances - ds.workflow.instance.running: (gauge) the number of running workflow instances -- ds.workflow.instance.timeout.count: (counter) the number of timeout workflow instances -- ds.workflow.instance.finish.count: (counter) indicates the number of finished workflow instances, both successes and failures included -- ds.workflow.instance.success.count: (counter) the number of successful workflow instances -- ds.workflow.instance.failure.count: (counter) the number of failed workflow instances -- ds.workflow.instance.stop.count: (counter) the number of stopped workflow instances -- ds.workflow.instance.failover.count: (counter) the number of workflow instance fail-overs +- ds.workflow.instance.count: (counter) the number of workflow instances, sliced by the tag `state`: + - timeout: the number of timeout workflow instances + - finish: the number of finished workflow instances, both successes and failures included + - success: the number of successful workflow instances + - fail: the number of failed workflow instances + - stop: the number of stopped workflow instances + - failover: the number of workflow instance fail-overs ### Master Server Metrics diff --git a/docs/docs/zh/guide/metrics/metrics.md b/docs/docs/zh/guide/metrics/metrics.md index 3116f5445b..9101935cc0 100644 --- a/docs/docs/zh/guide/metrics/metrics.md +++ b/docs/docs/zh/guide/metrics/metrics.md @@ -61,14 +61,15 @@ metrics exporter端口`server.port`是在application.yaml里定义的: master: ` ### 任务相关指标 -- ds.task.timeout.count: (counter) 超时的任务数量 -- ds.task.finish.count: (counter) 完成的任务数量,成功和失败的任务都算在内 -- ds.task.success.count: (counter) 成功完成的任务数量 -- ds.task.failure.count: (counter) 失败的任务数量 -- ds.task.stop.count: (counter) 暂停的任务数量 -- ds.task.retry.count: (counter) 重试的任务数量 -- ds.task.submit.count: (counter) 已提交的任务数量 -- ds.task.failover.count: (counter) 容错的任务数量 +- ds.task.instance.count: (counter) 任务实例数量,由tag `state`按状态切分: + - timeout:超时的任务数量 + - finish:完成的任务数量,成功和失败的任务都算在内 + - success:成功完成的任务数量 + - fail:失败的任务数量 + - stop:暂停的任务数量 + - retry:重试的任务数量 + - submit:已提交的任务数量 + - failover:容错的任务数量 - ds.task.dispatch.count: (counter) 分发到worker上的任务数量 - ds.task.dispatch.failure.count: (counter) 分发失败的任务数量,重试也包含在内 - ds.task.dispatch.error.count: (counter) 分发任务的错误数量 @@ -82,14 +83,15 @@ metrics exporter端口`server.port`是在application.yaml里定义的: master: ` ### 工作流相关指标 - ds.workflow.create.command.count: (counter) 工作量创建并插入的命令数量 -- ds.workflow.instance.submit.count: (counter) 已提交的工作量实例数量 - ds.workflow.instance.running: (gauge) 正在运行的工作流实例数量 -- ds.workflow.instance.timeout.count: (counter) 运行超时的工作流实例数量 -- ds.workflow.instance.finish.count: (counter) 已完成的工作流实例数量,包含成功和失败 -- ds.workflow.instance.success.count: (counter) 运行成功的工作流实例数量 -- ds.workflow.instance.failure.count: (counter) 运行失败的工作流实例数量 -- ds.workflow.instance.stop.count: (counter) 停止的工作流实例数量 -- ds.workflow.instance.failover.count: (counter) 容错的工作流实例数量 +- ds.workflow.instance.count: (counter) 工作流实例数量,由tag `state`按状态切分: + - submit:已提交的工作量实例数量 + - timeout:运行超时的工作流实例数量 + - finish:已完成的工作流实例数量,包含成功和失败 + - success:运行成功的工作流实例数量 + - fail:运行失败的工作流实例数量 + - stop:停止的工作流实例数量 + - failover:容错的工作流实例数量 ### Master Server指标 diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java index ee8168856a..f6f7069ab3 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java @@ -31,7 +31,7 @@ public class TaskRetryStateEventHandler implements StateEventHandler { @Override public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent) throws StateEventHandleException { - TaskMetrics.incTaskRetry(); + TaskMetrics.incTaskInstanceByState("retry"); Map waitToRetryTaskInstanceMap = workflowExecuteRunnable.getWaitToRetryTaskInstanceMap(); TaskInstance taskInstance = waitToRetryTaskInstanceMap.get(stateEvent.getTaskCode()); workflowExecuteRunnable.addTaskToStandByList(taskInstance); diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java index e3ad268f97..c0cf864d31 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java @@ -96,17 +96,17 @@ public class TaskStateEventHandler implements StateEventHandler { return; } if (taskStateEvent.getExecutionStatus().typeIsFinished()) { - TaskMetrics.incTaskFinish(); + TaskMetrics.incTaskInstanceByState("finish"); } switch (taskStateEvent.getExecutionStatus()) { case STOP: - TaskMetrics.incTaskStop(); + TaskMetrics.incTaskInstanceByState("stop"); break; case SUCCESS: - TaskMetrics.incTaskSuccess(); + TaskMetrics.incTaskInstanceByState("success"); break; case FAILURE: - TaskMetrics.incTaskFailure(); + TaskMetrics.incTaskInstanceByState("fail"); break; default: break; diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java index 240f10ff2c..c43c0bcbf2 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java @@ -35,7 +35,7 @@ public class TaskTimeoutStateEventHandler implements StateEventHandler { @Override public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent) throws StateEventHandleError { - TaskMetrics.incTaskTimeout(); + TaskMetrics.incTaskInstanceByState("timeout"); workflowExecuteRunnable.checkTaskInstanceByStateEvent(stateEvent); TaskInstance taskInstance = workflowExecuteRunnable.getTaskInstance(stateEvent.getTaskInstanceId()).get(); diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java index b4d9fc1f85..c598cb5a90 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java @@ -59,8 +59,7 @@ public class WorkflowStartEventHandler implements WorkflowEventHandler { "The workflow start event is invalid, cannot find the workflow instance from cache"); } ProcessInstance processInstance = workflowExecuteRunnable.getProcessInstance(); - - ProcessInstanceMetrics.incProcessInstanceSubmit(); + ProcessInstanceMetrics.incProcessInstanceByState("submit"); CompletableFuture workflowSubmitFuture = CompletableFuture.supplyAsync(workflowExecuteRunnable::call, workflowExecuteThreadPool); workflowSubmitFuture.thenAccept(workflowSubmitStatue -> { diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java index 3abdd879bb..a37b3023a3 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java @@ -75,17 +75,17 @@ public class WorkflowStateEventHandler implements StateEventHandler { private void measureProcessState(StateEvent processStateEvent) { if (processStateEvent.getExecutionStatus().typeIsFinished()) { - ProcessInstanceMetrics.incProcessInstanceFinish(); + ProcessInstanceMetrics.incProcessInstanceByState("finish"); } switch (processStateEvent.getExecutionStatus()) { case STOP: - ProcessInstanceMetrics.incProcessInstanceStop(); + ProcessInstanceMetrics.incProcessInstanceByState("stop"); break; case SUCCESS: - ProcessInstanceMetrics.incProcessInstanceSuccess(); + ProcessInstanceMetrics.incProcessInstanceByState("success"); break; case FAILURE: - ProcessInstanceMetrics.incProcessInstanceFailure(); + ProcessInstanceMetrics.incProcessInstanceByState("fail"); break; default: break; diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java index c2fc873bdc..b04866a76a 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java @@ -27,7 +27,7 @@ import com.google.auto.service.AutoService; public class WorkflowTimeoutStateEventHandler implements StateEventHandler { @Override public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent) { - ProcessInstanceMetrics.incProcessInstanceTimeout(); + ProcessInstanceMetrics.incProcessInstanceByState("timeout"); workflowExecuteRunnable.processTimeout(); return true; } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java index 8edf3f0c86..4cd8b3715a 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java @@ -17,9 +17,14 @@ package org.apache.dolphinscheduler.server.master.metrics; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; +import com.google.common.collect.ImmutableSet; + import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; @@ -31,6 +36,24 @@ public final class ProcessInstanceMetrics { throw new UnsupportedOperationException("Utility class"); } + private static Map PROCESS_INSTANCE_COUNTERS = new HashMap<>(); + + private static final Set PROCESS_INSTANCE_STATES = ImmutableSet.of( + "submit", "timeout", "finish", "failover", "success", "fail", "stop"); + + static { + for (final String state : PROCESS_INSTANCE_STATES) { + PROCESS_INSTANCE_COUNTERS.put( + state, + Counter.builder("ds.workflow.instance.count") + .tag("state", state) + .description(String.format("Process instance %s total count", state)) + .register(Metrics.globalRegistry) + ); + } + + } + private static final Timer COMMAND_QUERY_TIMETER = Timer.builder("ds.workflow.command.query.duration") .description("Command query duration") @@ -41,41 +64,6 @@ public final class ProcessInstanceMetrics { .description("Process instance generated duration") .register(Metrics.globalRegistry); - private static final Counter PROCESS_INSTANCE_SUBMIT_COUNTER = - Counter.builder("ds.workflow.instance.submit.count") - .description("Process instance submit total count") - .register(Metrics.globalRegistry); - - private static final Counter PROCESS_INSTANCE_TIMEOUT_COUNTER = - Counter.builder("ds.workflow.instance.timeout.count") - .description("Process instance timeout total count") - .register(Metrics.globalRegistry); - - private static final Counter PROCESS_INSTANCE_FINISH_COUNTER = - Counter.builder("ds.workflow.instance.finish.count") - .description("Process instance finish total count") - .register(Metrics.globalRegistry); - - private static final Counter PROCESS_INSTANCE_SUCCESS_COUNTER = - Counter.builder("ds.workflow.instance.success.count") - .description("Process instance success total count") - .register(Metrics.globalRegistry); - - private static final Counter PROCESS_INSTANCE_FAILURE_COUNTER = - Counter.builder("ds.workflow.instance.failure.count") - .description("Process instance failure total count") - .register(Metrics.globalRegistry); - - private static final Counter PROCESS_INSTANCE_STOP_COUNTER = - Counter.builder("ds.workflow.instance.stop.count") - .description("Process instance stop total count") - .register(Metrics.globalRegistry); - - private static final Counter PROCESS_INSTANCE_FAILOVER_COUNTER = - Counter.builder("ds.workflow.instance.failover.count") - .description("Process instance failover total count") - .register(Metrics.globalRegistry); - public static void recordCommandQueryTime(long milliseconds) { COMMAND_QUERY_TIMETER.record(milliseconds, TimeUnit.MILLISECONDS); } @@ -96,31 +84,8 @@ public final class ProcessInstanceMetrics { .register(Metrics.globalRegistry); } - public static void incProcessInstanceSubmit() { - PROCESS_INSTANCE_SUBMIT_COUNTER.increment(); - } - - public static void incProcessInstanceTimeout() { - PROCESS_INSTANCE_TIMEOUT_COUNTER.increment(); - } - - public static void incProcessInstanceFinish() { - PROCESS_INSTANCE_FINISH_COUNTER.increment(); + public static void incProcessInstanceByState(final String state) { + PROCESS_INSTANCE_COUNTERS.get(state).increment(); } - public static void incProcessInstanceSuccess() { - PROCESS_INSTANCE_SUCCESS_COUNTER.increment(); - } - - public static void incProcessInstanceFailure() { - PROCESS_INSTANCE_FAILURE_COUNTER.increment(); - } - - public static void incProcessInstanceStop() { - PROCESS_INSTANCE_STOP_COUNTER.increment(); - } - - public static void incProcessInstanceFailover() { - PROCESS_INSTANCE_FAILOVER_COUNTER.increment(); - } } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java index 3f2ed1544f..5b20c59bac 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java @@ -17,8 +17,12 @@ package org.apache.dolphinscheduler.server.master.metrics; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; import java.util.function.Supplier; +import com.facebook.presto.jdbc.internal.guava.collect.ImmutableSet; import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; @@ -29,45 +33,24 @@ public final class TaskMetrics { throw new UnsupportedOperationException("Utility class"); } - private static final Counter TASK_SUBMIT_COUNTER = - Counter.builder("ds.task.submit.count") - .description("Task submit total count") - .register(Metrics.globalRegistry); - - private static final Counter TASK_FINISH_COUNTER = - Counter.builder("ds.task.finish.count") - .description("Task finish total count") - .register(Metrics.globalRegistry); - - private static final Counter TASK_SUCCESS_COUNTER = - Counter.builder("ds.task.success.count") - .description("Task success total count") - .register(Metrics.globalRegistry); - private static final Counter TASK_FAILURE_COUNTER = - Counter.builder("ds.task.failure.count") - .description("Task failure total count") - .register(Metrics.globalRegistry); + private static Map TASK_INSTANCE_COUNTERS = new HashMap<>(); - private static final Counter TASK_TIMEOUT_COUNTER = - Counter.builder("ds.task.timeout.count") - .description("Task timeout total count") - .register(Metrics.globalRegistry); + private static final Set TASK_INSTANCE_STATES = ImmutableSet.of( + "submit", "timeout", "finish", "failover", "retry", "dispatch", "success", "fail", "stop"); - private static final Counter TASK_RETRY_COUNTER = - Counter.builder("ds.task.retry.count") - .description("Task retry total count") - .register(Metrics.globalRegistry); + static { + for (final String state : TASK_INSTANCE_STATES) { + TASK_INSTANCE_COUNTERS.put( + state, + Counter.builder("ds.task.instance.count") + .tags("state", state) + .description(String.format("Process instance %s total count", state)) + .register(Metrics.globalRegistry) + ); + } - private static final Counter TASK_STOP_COUNTER = - Counter.builder("ds.task.stop.count") - .description("Task stop total count") - .register(Metrics.globalRegistry); - - private static final Counter TASK_FAILOVER_COUNTER = - Counter.builder("ds.task.failover.count") - .description("Task failover total count") - .register(Metrics.globalRegistry); + } private static final Counter TASK_DISPATCH_COUNTER = Counter.builder("ds.task.dispatch.count") @@ -76,52 +59,20 @@ public final class TaskMetrics { private static final Counter TASK_DISPATCHER_FAILED = Counter.builder("ds.task.dispatch.failure.count") - .description("Task dispatch failed count") + .description("Task dispatch failures count, retried ones included") .register(Metrics.globalRegistry); private static final Counter TASK_DISPATCH_ERROR = Counter.builder("ds.task.dispatch.error.count") - .description("Task dispatch error") + .description("Number of errors during task dispatch") .register(Metrics.globalRegistry); - public static void incTaskSubmit() { - TASK_SUBMIT_COUNTER.increment(); - } - public synchronized static void registerTaskPrepared(Supplier consumer) { Gauge.builder("ds.task.prepared", consumer) .description("Task prepared count") .register(Metrics.globalRegistry); } - public static void incTaskFinish() { - TASK_FINISH_COUNTER.increment(); - } - - public static void incTaskSuccess() { - TASK_SUCCESS_COUNTER.increment(); - } - - public static void incTaskFailure() { - TASK_FAILURE_COUNTER.increment(); - } - - public static void incTaskTimeout() { - TASK_TIMEOUT_COUNTER.increment(); - } - - public static void incTaskRetry() { - TASK_RETRY_COUNTER.increment(); - } - - public static void incTaskStop() { - TASK_STOP_COUNTER.increment(); - } - - public static void incTaskFailover() { - TASK_FAILOVER_COUNTER.increment(); - } - public static void incTaskDispatchFailed(int failedCount) { TASK_DISPATCHER_FAILED.increment(failedCount); } @@ -134,4 +85,8 @@ public final class TaskMetrics { TASK_DISPATCH_COUNTER.increment(); } + public static void incTaskInstanceByState(final String state) { + TASK_INSTANCE_COUNTERS.get(state).increment(); + } + } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java index 3de9299d22..27f90d3d00 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java @@ -1673,7 +1673,7 @@ public class WorkflowExecuteRunnable implements Callable { taskInstance.getName(), taskInstance.getId(), taskInstance.getTaskCode()); - TaskMetrics.incTaskSubmit(); + TaskMetrics.incTaskInstanceByState("submit"); readyToSubmitTaskQueue.put(taskInstance); } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java index efe33de5ff..f89c872784 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java @@ -169,7 +169,7 @@ public class MasterFailoverService { } } - ProcessInstanceMetrics.incProcessInstanceFailover(); + ProcessInstanceMetrics.incProcessInstanceByState("failover"); //updateProcessInstance host is null to mark this processInstance has been failover // and insert a failover command processInstance.setHost(Constants.NULL); @@ -211,7 +211,7 @@ public class MasterFailoverService { * @param taskInstance */ private void failoverTaskInstance(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) { - TaskMetrics.incTaskFailover(); + TaskMetrics.incTaskInstanceByState("failover"); boolean isMasterTask = TaskProcessorFactory.isMasterTask(taskInstance.getTaskType()); taskInstance.setProcessInstance(processInstance); diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java index 9a4578d8e6..d817e67fe2 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java @@ -156,8 +156,7 @@ public class WorkerFailoverService { * @param taskInstance */ private void failoverTaskInstance(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) { - - TaskMetrics.incTaskFailover(); + TaskMetrics.incTaskInstanceByState("failover"); boolean isMasterTask = TaskProcessorFactory.isMasterTask(taskInstance.getTaskType()); taskInstance.setProcessInstance(processInstance); diff --git a/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json b/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json index 5461759c8c..aa02dfae6a 100644 --- a/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json +++ b/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json @@ -43,12 +43,11 @@ "fiscalYearStartMonth": 0, "gnetId": 4701, "graphTooltip": 1, - "iteration": 1654674717443, "links": [], "liveNow": false, "panels": [ { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, @@ -56,1442 +55,1439 @@ "y": 0 }, "id": 164, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "panels": [], + "title": "MasterServer", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 1 - }, - "id": 148, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "code", - "expr": "increase(ds_master_overload_count_total[1m])", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Master Overload/1m", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 148, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "editorMode": "code", + "expr": "increase(ds_master_overload_count_total[1m])", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Master Overload/1m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 1 - }, - "id": 150, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "code", - "expr": "increase(ds_master_consume_command_count_total{}[1m])", - "legendFormat": "master_consume_command", - "range": true, - "refId": "A" - } - ], - "title": "Master Consume Command/1m", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 150, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 12, - "y": 1 - }, - "id": 168, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "editorMode": "code", + "expr": "increase(ds_master_consume_command_count_total{}[1m])", + "legendFormat": "master_consume_command", + "range": true, + "refId": "A" + } + ], + "title": "Master Consume Command/1m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "jvm_threads_live_threads{application=\"master-server\"}", - "legendFormat": "live_thread", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "jvm_threads_daemon_threads{application=\"master-server\"}", - "hide": false, - "legendFormat": "daemon_thread", - "range": true, - "refId": "B" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "jvm_threads_peak_threads{application=\"master-server\"}", - "hide": false, - "legendFormat": "peak_thread", - "range": true, - "refId": "C" - } - ], - "title": "JVM Thread", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 1 - }, - "id": 170, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "expr": "jvm_threads_states_threads{application=\"master-server\"}", - "refId": "A" - } - ], - "title": "Thread Status", - "type": "timeseries" - } - ], - "title": "MasterServer", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "datasource", - "uid": "grafana" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, "gridPos": { - "h": 1, - "w": 24, - "x": 0, + "h": 8, + "w": 6, + "x": 12, "y": 1 }, - "id": 126, - "panels": [ + "id": 168, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "JOBS" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 2 - }, - "id": 63, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.5.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(ds_master_quartz_job_executed_total)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "metric": "", - "refId": "A", - "step": 14400 - } - ], - "title": "Job Total Count", - "type": "stat" + "editorMode": "code", + "exemplar": false, + "expr": "jvm_threads_live_threads{application=\"master-server\"}", + "legendFormat": "live_thread", + "range": true, + "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 2 - }, - "id": 144, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "8.5.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(ds_master_quartz_job_executed_total{result=\"success\"}) / sum(ds_master_quartz_job_executed_total) * 100", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "metric": "", - "refId": "A", - "step": 14400 - } - ], - "title": "Job Successful Rate", - "type": "gauge" + "editorMode": "code", + "expr": "jvm_threads_daemon_threads{application=\"master-server\"}", + "hide": false, + "legendFormat": "daemon_thread", + "range": true, + "refId": "B" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 6, - "x": 0, - "y": 10 - }, - "hiddenSeries": false, - "id": 139, - "legend": { - "alignAsTable": false, - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(ds_master_quartz_job_executed_total{})", - "hide": false, - "interval": "", - "legendFormat": "Total", - "refId": "A" + "editorMode": "code", + "expr": "jvm_threads_peak_threads{application=\"master-server\"}", + "hide": false, + "legendFormat": "peak_thread", + "range": true, + "refId": "C" + } + ], + "title": "JVM Thread", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "exemplar": true, - "expr": "ds_master_quartz_job_executed_total{result=\"success\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Successful", - "refId": "B" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "exemplar": true, - "expr": "ds_master_quartz_job_executed_total{result=\"failure\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Failed ({{exception}})", - "refId": "C" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Quartz Job Executed Count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1516", - "format": "short", - "logBase": 1, - "min": "0", - "show": true + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "$$hashKey": "object:1517", - "format": "short", - "logBase": 1, - "show": true + "thresholdsStyle": { + "mode": "off" } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 6, - "x": 6, - "y": 10 - }, - "hiddenSeries": false, - "id": 101, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "rate(ds_master_quartz_job_execution_time_seconds_sum[1m])/rate(ds_master_quartz_job_execution_time_seconds_count[1m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg ({{exception}})", - "refId": "A" - }, - { - "exemplar": true, - "expr": "quartz_job_execution_seconds_max", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max ({{exception}})", - "refId": "B" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Quartz Job Execution Time", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1671", - "format": "s", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:1672", - "format": "short", - "label": "", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 6, - "x": 12, - "y": 10 - }, - "hiddenSeries": false, - "id": 119, - "legend": { - "alignAsTable": false, - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(increase(ds_master_quartz_job_executed_total[1m]))", - "hide": false, - "interval": "", - "legendFormat": "Total", - "refId": "A" - }, - { - "exemplar": true, - "expr": "increase(ds_master_quartz_job_executed_total{result=\"success\"}[1m])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Successful", - "refId": "B" - }, - { - "exemplar": true, - "expr": "increase(ds_master_quartz_job_executed_total{result=\"failure\"}[1m])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Failed ({{exception}})", - "refId": "C" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Quartz Job Executed Count / Minute", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1516", - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:1517", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 170, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { - "cards": {}, - "color": { - "cardColor": "#F2495C", - "colorScale": "sqrt", - "colorScheme": "interpolateReds", - "exponent": 0.5, - "mode": "opacity" - }, - "dataFormat": "timeseries", "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "gridPos": { - "h": 9, - "w": 6, - "x": 18, - "y": 10 - }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 146, - "legend": { - "show": true - }, - "pluginVersion": "8.2.3", - "reverseYBuckets": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(ds_master_quartz_job_execution_time_seconds_bucket[5m])) by (le))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Quartz Job Execution Time Distribution", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "yAxis": { - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "expr": "jvm_threads_states_threads{application=\"master-server\"}", + "refId": "A" } ], - "title": "Scheduler", - "type": "row" + "title": "Thread Status", + "type": "timeseries" }, { - "collapsed": true, + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 2 + "y": 9 }, - "id": 166, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "id": 126, + "panels": [], + "title": "Scheduler", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 3 - }, - "id": 152, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + ] }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "expr": "increase(ds_workflow_instance_submit_count_total{}[1m])", - "refId": "A" - } - ], - "title": "Process Instance Submit/1m", - "type": "timeseries" + "unit": "JOBS" }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 3 - }, - "id": 162, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "expr": "increase(ds_workflow_instance_finish_count_total{}[1m])", - "refId": "A" - } + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 63, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" ], - "title": "Process Instance Finish/1m", - "type": "timeseries" + "fields": "", + "values": false }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.5.3", + "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "exemplar": true, + "expr": "sum(ds_master_quartz_job_executed_total)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "title": "Job Total Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "color": "green", + "value": 80 } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 11 - }, - "id": 156, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + ] }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "expr": "increase(ds_workflow_instance_success_count_total{}[1m])", - "refId": "A" - } + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 144, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" ], - "title": "Process Instance Success /1m", - "type": "timeseries" + "fields": "", + "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.5.3", + "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 11 - }, - "id": 160, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "expr": "increase(ds_workflow_instance_stop_count_total{}[1m])", - "refId": "A" - } - ], - "title": "Process Instance Stop/1m", - "type": "timeseries" + "exemplar": true, + "expr": "sum(ds_master_quartz_job_executed_total{result=\"success\"}) / sum(ds_master_quartz_job_executed_total) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "title": "Job Successful Rate", + "type": "gauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 139, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ds_master_quartz_job_executed_total{})", + "hide": false, + "interval": "", + "legendFormat": "Total", + "refId": "A" }, + { + "exemplar": true, + "expr": "ds_master_quartz_job_executed_total{result=\"success\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Successful", + "refId": "B" + }, + { + "exemplar": true, + "expr": "ds_master_quartz_job_executed_total{result=\"failure\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Failed ({{exception}})", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Quartz Job Executed Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1516", + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:1517", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 18 + }, + "hiddenSeries": false, + "id": 101, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(ds_master_quartz_job_execution_time_seconds_sum[1m])/rate(ds_master_quartz_job_execution_time_seconds_count[1m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg ({{exception}})", + "refId": "A" + }, + { + "exemplar": true, + "expr": "quartz_job_execution_seconds_max", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max ({{exception}})", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Quartz Job Execution Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1671", + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:1672", + "format": "short", + "label": "", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 119, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(ds_master_quartz_job_executed_total[1m]))", + "hide": false, + "interval": "", + "legendFormat": "Total", + "refId": "A" + }, + { + "exemplar": true, + "expr": "increase(ds_master_quartz_job_executed_total{result=\"success\"}[1m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Successful", + "refId": "B" + }, + { + "exemplar": true, + "expr": "increase(ds_master_quartz_job_executed_total{result=\"failure\"}[1m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Failed ({{exception}})", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Quartz Job Executed Count / Minute", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1516", + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:1517", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "cards": {}, + "color": { + "cardColor": "#F2495C", + "colorScale": "sqrt", + "colorScheme": "interpolateReds", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 18 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 146, + "legend": { + "show": true + }, + "pluginVersion": "8.2.3", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(ds_master_quartz_job_execution_time_seconds_bucket[5m])) by (le))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Quartz Job Execution Time Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 166, + "panels": [], + "title": "ProcessInstance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 152, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(increase(ds_workflow_instance_count_total{state=\"submit\"}[1m]))", + "refId": "A" + } + ], + "title": "Process Instance Submit/1m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 162, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(increase(ds_workflow_instance_count_total{state=\"finish\"}[1m]))", + "refId": "A" + } + ], + "title": "Process Instance Finish/1m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 36 + }, + "id": 156, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "expr": "sum(increase(ds_workflow_instance_count_total{state=\"success\"}[1m]))", + "refId": "A" + } + ], + "title": "Process Instance Success /1m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 12, - "y": 11 - }, - "id": 154, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "expr": "increase(ds_workflow_instance_timeout_count_total{}[1m])", - "refId": "A" - } - ], - "title": "Process Instance Timeout/1m", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 36 + }, + "id": 160, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "expr": "sum(increase(ds_workflow_instance_count_total{state=\"stop\"}[1m]))", + "refId": "A" + } + ], + "title": "Process Instance Stop/1m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "color": "red", + "value": 80 } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 36 + }, + "id": 154, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(increase(ds_workflow_instance_count_total{state=\"timeout\"}[1m]))", + "refId": "A" + } + ], + "title": "Process Instance Timeout/1m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 11 - }, - "id": 158, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "expr": "increase(ds_workflow_instance_failure_count_total{}[1m])", - "refId": "A" - } - ], - "title": "Process Instance Failure/1m", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 36 + }, + "id": 158, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(increase(ds_workflow_instance_count_total{state=\"fail\"}[1m]))", + "refId": "A" } ], - "title": "ProcessInstance", - "type": "row" + "title": "Process Instance Failure/1m", + "type": "timeseries" }, { "collapsed": false, @@ -1499,7 +1495,7 @@ "h": 1, "w": 24, "x": 0, - "y": 3 + "y": 44 }, "id": 172, "panels": [], @@ -1565,7 +1561,7 @@ "h": 8, "w": 8, "x": 0, - "y": 4 + "y": 45 }, "id": 178, "options": { @@ -1669,7 +1665,7 @@ "h": 8, "w": 8, "x": 8, - "y": 4 + "y": 45 }, "id": 180, "options": { @@ -1689,7 +1685,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_submit_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"submit\"}[1m]))", "refId": "A" } ], @@ -1755,7 +1751,7 @@ "h": 8, "w": 8, "x": 16, - "y": 4 + "y": 45 }, "id": 182, "options": { @@ -1775,7 +1771,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_finish_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"finish\"}[1m]))", "refId": "A" } ], @@ -1841,7 +1837,7 @@ "h": 8, "w": 8, "x": 0, - "y": 12 + "y": 53 }, "id": 184, "options": { @@ -1861,7 +1857,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_success_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"success\"}[1m]))", "refId": "A" } ], @@ -1927,7 +1923,7 @@ "h": 8, "w": 8, "x": 8, - "y": 12 + "y": 53 }, "id": 186, "options": { @@ -1947,7 +1943,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_failure_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"fail\"}[1m]))", "refId": "A" } ], @@ -2013,7 +2009,7 @@ "h": 8, "w": 8, "x": 16, - "y": 12 + "y": 53 }, "id": 188, "options": { @@ -2033,7 +2029,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_timeout_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"timeout\"}[1m]))", "refId": "A" } ], @@ -2099,7 +2095,7 @@ "h": 8, "w": 8, "x": 0, - "y": 20 + "y": 61 }, "id": 190, "options": { @@ -2119,7 +2115,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_retry_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"retry\"}[1m]))", "refId": "A" } ], @@ -2185,7 +2181,7 @@ "h": 8, "w": 8, "x": 8, - "y": 20 + "y": 61 }, "id": 192, "options": { @@ -2205,7 +2201,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_stop_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"retry\"}[1m]))", "refId": "A" } ], @@ -2271,7 +2267,7 @@ "h": 8, "w": 8, "x": 16, - "y": 20 + "y": 61 }, "id": 194, "options": { @@ -2291,7 +2287,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(increase(ds_task_failover_count_total{}[1m]))", + "expr": "sum(increase(ds_task_instance_count_total{state=\"failover\"}[1m]))", "refId": "A" } ], @@ -2308,8 +2304,8 @@ { "current": { "selected": false, - "text": "master-server", - "value": "master-server" + "text": "standalone-server", + "value": "standalone-server" }, "datasource": { "type": "prometheus", @@ -2339,8 +2335,8 @@ "allFormat": "glob", "current": { "selected": false, - "text": "host.docker.internal:5679", - "value": "host.docker.internal:5679" + "text": "host.docker.internal:12345", + "value": "host.docker.internal:12345" }, "datasource": { "type": "prometheus", @@ -2434,7 +2430,7 @@ ] }, "time": { - "from": "now-5m", + "from": "now-30m", "to": "now" }, "timepicker": { @@ -2466,6 +2462,6 @@ "timezone": "browser", "title": "Master", "uid": "6XgATOcnz", - "version": 2, + "version": 1, "weekStart": "" } \ No newline at end of file