@ -17,11 +17,14 @@
package org.apache.dolphinscheduler.server.worker.metrics ;
package org.apache.dolphinscheduler.server.worker.metrics ;
import java.util.concurrent.TimeUnit ;
import java.util.function.Supplier ;
import java.util.function.Supplier ;
import io.micrometer.core.instrument.Counter ;
import io.micrometer.core.instrument.Counter ;
import io.micrometer.core.instrument.DistributionSummary ;
import io.micrometer.core.instrument.Gauge ;
import io.micrometer.core.instrument.Gauge ;
import io.micrometer.core.instrument.Metrics ;
import io.micrometer.core.instrument.Metrics ;
import io.micrometer.core.instrument.Timer ;
import lombok.experimental.UtilityClass ;
import lombok.experimental.UtilityClass ;
@UtilityClass
@UtilityClass
@ -37,6 +40,33 @@ public class WorkerServerMetrics {
. description ( "full worker submit queues count" )
. description ( "full worker submit queues count" )
. register ( Metrics . globalRegistry ) ;
. register ( Metrics . globalRegistry ) ;
private static final Counter WORKER_RESOURCE_DOWNLOAD_SUCCESS_COUNTER =
Counter . builder ( "ds.worker.resource.download.count" )
. tag ( "status" , "success" )
. description ( "worker resource download success count" )
. register ( Metrics . globalRegistry ) ;
private static final Counter WORKER_RESOURCE_DOWNLOAD_FAILURE_COUNTER =
Counter . builder ( "ds.worker.resource.download.count" )
. tag ( "status" , "fail" )
. description ( "worker resource download failure count" )
. register ( Metrics . globalRegistry ) ;
private static final Timer WORKER_RESOURCE_DOWNLOAD_DURATION_TIMER =
Timer . builder ( "ds.worker.resource.download.duration" )
. publishPercentiles ( 0 . 5 , 0 . 75 , 0 . 95 , 0 . 99 )
. publishPercentileHistogram ( )
. description ( "time cost of resource download on workers" )
. register ( Metrics . globalRegistry ) ;
private static final DistributionSummary WORKER_RESOURCE_DOWNLOAD_SIZE_DISTRIBUTION =
DistributionSummary . builder ( "ds.worker.resource.download.size" )
. baseUnit ( "bytes" )
. publishPercentiles ( 0 . 5 , 0 . 75 , 0 . 95 , 0 . 99 )
. publishPercentileHistogram ( )
. description ( "size of downloaded resource files on worker" )
. register ( Metrics . globalRegistry ) ;
public static void incWorkerOverloadCount ( ) {
public static void incWorkerOverloadCount ( ) {
WORKER_OVERLOAD_COUNTER . increment ( ) ;
WORKER_OVERLOAD_COUNTER . increment ( ) ;
}
}
@ -45,11 +75,26 @@ public class WorkerServerMetrics {
WORKER_SUBMIT_QUEUE_IS_FULL_COUNTER . increment ( ) ;
WORKER_SUBMIT_QUEUE_IS_FULL_COUNTER . increment ( ) ;
}
}
public static void registerWorkerRunningTaskGauge ( Supplier < Number > supplier ) {
public static void incWorkerResourceDownloadSuccessCount ( ) {
WORKER_RESOURCE_DOWNLOAD_SUCCESS_COUNTER . increment ( ) ;
}
public static void incWorkerResourceDownloadFailureCount ( ) {
WORKER_RESOURCE_DOWNLOAD_FAILURE_COUNTER . increment ( ) ;
}
public static void recordWorkerResourceDownloadTime ( final long milliseconds ) {
WORKER_RESOURCE_DOWNLOAD_DURATION_TIMER . record ( milliseconds , TimeUnit . MILLISECONDS ) ;
}
public static void recordWorkerResourceDownloadSize ( final long size ) {
WORKER_RESOURCE_DOWNLOAD_SIZE_DISTRIBUTION . record ( size ) ;
}
public static void registerWorkerRunningTaskGauge ( final Supplier < Number > supplier ) {
Gauge . builder ( "ds.task.running" , supplier )
Gauge . builder ( "ds.task.running" , supplier )
. description ( "number of running tasks on workers" )
. description ( "number of running tasks on workers" )
. register ( Metrics . globalRegistry ) ;
. register ( Metrics . globalRegistry ) ;
}
}
}
}