From 2b99451ccdbb5b2ba0993db49039d4fc48fa0bc4 Mon Sep 17 00:00:00 2001 From: Wenjun Ruan Date: Wed, 26 Jul 2023 23:03:58 +0800 Subject: [PATCH] Support filter restrict network interface (#14638) --- docs/docs/en/architecture/configuration.md | 63 ++++++++++--------- docs/docs/zh/architecture/configuration.md | 63 ++++++++++--------- .../common/constants/Constants.java | 12 ---- .../common/utils/NetUtils.java | 37 +++++++++-- .../common/utils/PropertyUtils.java | 10 +++ .../src/main/resources/common.properties | 3 + .../common/utils/PropertyUtilsTest.java | 20 ++++++ 7 files changed, 128 insertions(+), 80 deletions(-) diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 08b6ac4011..469b44ffdb 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -200,37 +200,38 @@ Currently, common.properties mainly configures Hadoop,s3a related configurations The default configuration is as follows: -| Parameters | Default value | Description | -|--|--|--| -|data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files| -|resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE| -|resource.upload.path | /dolphinscheduler | storage path of resource files| -|aws.access.key.id | minioadmin | access key id of S3| -|aws.secret.access.key | minioadmin | secret access key of S3| -|aws.region | us-east-1 | region of S3| -|aws.s3.endpoint | http://minio:9000 | endpoint of S3| -|hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS| -|fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory| -|hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission| -|java.security.krb5.conf.path | /opt/krb5.conf | kerberos config directory| -|login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos username| -|login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos user keytab| -|kerberos.expire.time | 2 | kerberos expire time,integer,the unit is hour| -|yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | specify the yarn resourcemanager url. if resourcemanager supports HA, input HA IP addresses (separated by comma), or input null for standalone| -|yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | keep default if ResourceManager supports HA or not use ResourceManager, or replace ds1 with corresponding hostname if ResourceManager in standalone mode| -|development.state | false | specify whether in development state| -|dolphin.scheduler.network.interface.preferred | NONE | display name of the network card| -|dolphin.scheduler.network.priority.strategy | default | IP acquisition strategy, give priority to finding the internal network or the external network| -|resource.manager.httpaddress.port | 8088 | the port of resource manager| -|yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | job history status url of yarn| -|datasource.encryption.enable | false | whether to enable datasource encryption| -|datasource.encryption.salt | !@#$%^&* | the salt of the datasource encryption| -|data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | the jar of data quality| -|support.hive.oneSession | false | specify whether hive SQL is executed in the same session| -|sudo.enable | true | whether to enable sudo| -|alert.rpc.port | 50052 | the RPC port of Alert Server| -|zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| -|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop, annotation of applicationId auto collection related configuration in `bin/env/dolphinscheduler_env.sh` should be removed. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and .| +| Parameters | Default value | Description | +|-----------------------------------------------|--------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files | +| resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE | +| resource.upload.path | /dolphinscheduler | storage path of resource files | +| aws.access.key.id | minioadmin | access key id of S3 | +| aws.secret.access.key | minioadmin | secret access key of S3 | +| aws.region | us-east-1 | region of S3 | +| aws.s3.endpoint | http://minio:9000 | endpoint of S3 | +| hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS | +| fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory | +| hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission | +| java.security.krb5.conf.path | /opt/krb5.conf | kerberos config directory | +| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos username | +| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos user keytab | +| kerberos.expire.time | 2 | kerberos expire time,integer,the unit is hour | +| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | specify the yarn resourcemanager url. if resourcemanager supports HA, input HA IP addresses (separated by comma), or input null for standalone | +| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | keep default if ResourceManager supports HA or not use ResourceManager, or replace ds1 with corresponding hostname if ResourceManager in standalone mode | +| development.state | false | specify whether in development state | +| dolphin.scheduler.network.interface.preferred | NONE | display name of the network card which will be used | +| dolphin.scheduler.network.interface.restrict | docker0 | display name of the network card which shouldn't be used | +| dolphin.scheduler.network.priority.strategy | default | IP acquisition strategy, give priority to finding the internal network or the external network | +| resource.manager.httpaddress.port | 8088 | the port of resource manager | +| yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | job history status url of yarn | +| datasource.encryption.enable | false | whether to enable datasource encryption | +| datasource.encryption.salt | !@#$%^&* | the salt of the datasource encryption | +| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | the jar of data quality | +| support.hive.oneSession | false | specify whether hive SQL is executed in the same session | +| sudo.enable | true | whether to enable sudo | +| alert.rpc.port | 50052 | the RPC port of Alert Server | +| zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin | +| appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop, annotation of applicationId auto collection related configuration in `bin/env/dolphinscheduler_env.sh` should be removed. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and . | ### Api-server related configuration diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 5a39ff725c..87bcc882ce 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -200,37 +200,38 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId 默认配置如下: -| 参数 | 默认值 | 描述 | -|--|--|--| -|data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件| -|resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE| -|resource.upload.path | /dolphinscheduler | 资源文件存储路径| -|aws.access.key.id | minioadmin | S3 access key| -|aws.secret.access.key | minioadmin | S3 secret access key| -|aws.region | us-east-1 | S3 区域| -|aws.s3.endpoint | http://minio:9000 | S3 endpoint地址| -|hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户| -|fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录| -|hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限| -|java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录| -|login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户| -|login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab| -|kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时| -|yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可| -|yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname| -|development.state | false | 是否处于开发模式| -|dolphin.scheduler.network.interface.preferred | NONE | 网卡名称| -|dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网| -|resource.manager.httpaddress.port | 8088 | resource manager的端口| -|yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL| -|datasource.encryption.enable | false | 是否启用datasource 加密| -|datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt| -|data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包| -|support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行| -|sudo.enable | true | 是否开启sudo| -|alert.rpc.port | 50052 | Alert Server的RPC端口| -|zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| -|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| +| 参数 | 默认值 | 描述 | +|-----------------------------------------------|--|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 | +| resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE | +| resource.upload.path | /dolphinscheduler | 资源文件存储路径 | +| aws.access.key.id | minioadmin | S3 access key | +| aws.secret.access.key | minioadmin | S3 secret access key | +| aws.region | us-east-1 | S3 区域 | +| aws.s3.endpoint | http://minio:9000 | S3 endpoint地址 | +| hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户 | +| fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录 | +| hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限 | +| java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录 | +| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户 | +| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab | +| kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时 | +| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可 | +| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname | +| development.state | false | 是否处于开发模式 | +| dolphin.scheduler.network.interface.preferred | NONE | 将会被使用的网卡名称 | +| dolphin.scheduler.network.interface.restrict | NONE | 禁止使用的网卡名称 | +| dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网 | +| resource.manager.httpaddress.port | 8088 | resource manager的端口 | +| yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL | +| datasource.encryption.enable | false | 是否启用datasource 加密 | +| datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt | +| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 | +| support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 | +| sudo.enable | true | 是否开启sudo | +| alert.rpc.port | 50052 | Alert Server的RPC端口 | +| zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址 | +| appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效 | ## Api-server相关配置 diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java index f214e7aea7..f2260c30cf 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java @@ -661,18 +661,6 @@ public final class Constants { */ public static final String SYSTEM_LINE_SEPARATOR = System.getProperty("line.separator"); - /** - * network interface preferred - */ - public static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED = - "dolphin.scheduler.network.interface.preferred"; - - /** - * network IP gets priority, default inner outer - */ - public static final String DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY = - "dolphin.scheduler.network.priority.strategy"; - /** * exec shell scripts */ diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java index 161ee4698d..e1d98e169a 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java @@ -17,8 +17,6 @@ package org.apache.dolphinscheduler.common.utils; -import org.apache.dolphinscheduler.common.constants.Constants; - import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.http.conn.util.InetAddressUtils; @@ -31,20 +29,32 @@ import java.net.NetworkInterface; import java.net.SocketException; import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import com.google.common.collect.Sets; + /** * NetUtils */ @Slf4j public class NetUtils { + private static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED = + "dolphin.scheduler.network.interface.preferred"; + private static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_RESTRICT = + "dolphin.scheduler.network.interface.restrict"; + + private static final String DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY = + "dolphin.scheduler.network.priority.strategy"; + private static final String NETWORK_PRIORITY_DEFAULT = "default"; private static final String NETWORK_PRIORITY_INNER = "inner"; private static final String NETWORK_PRIORITY_OUTER = "outer"; @@ -214,6 +224,13 @@ public class NetUtils { } } + Set restrictNetworkInterfaceName = restrictNetworkInterfaceName(); + if (CollectionUtils.isNotEmpty(restrictNetworkInterfaceName)) { + validNetworkInterfaces = validNetworkInterfaces.stream() + .filter(validNetworkInterface -> !restrictNetworkInterfaceName + .contains(validNetworkInterface.getDisplayName())) + .collect(Collectors.toList()); + } return filterByNetworkPriority(validNetworkInterfaces); } @@ -297,16 +314,24 @@ public class NetUtils { } private static String specifyNetworkInterfaceName() { - return PropertyUtils.getString( - Constants.DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED, - System.getProperty(Constants.DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED)); + return PropertyUtils.getString(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED, + System.getProperty(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED)); + } + + private static Set restrictNetworkInterfaceName() { + return PropertyUtils.getSet(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_RESTRICT, value -> { + if (StringUtils.isEmpty(value)) { + return Collections.emptySet(); + } + return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toSet()); + }, Sets.newHashSet("docker0")); } private static List filterByNetworkPriority(List validNetworkInterfaces) { if (CollectionUtils.isEmpty(validNetworkInterfaces)) { return Collections.emptyList(); } - String networkPriority = PropertyUtils.getString(Constants.DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY, + String networkPriority = PropertyUtils.getString(DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY, NETWORK_PRIORITY_DEFAULT); switch (networkPriority) { case NETWORK_PRIORITY_DEFAULT: diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java index 972f17a8d5..730c446396 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java @@ -23,6 +23,7 @@ import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.enums.ResUploadType; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import java.io.IOException; import java.io.InputStream; @@ -30,6 +31,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Properties; import java.util.Set; +import java.util.function.Function; import lombok.extern.slf4j.Slf4j; @@ -294,4 +296,12 @@ public class PropertyUtils { }); return propertiesMap; } + + public static Set getSet(String key, Function> transformFunction, Set defaultValue) { + String value = (String) properties.get(key); + if (StringUtils.isEmpty(value)) { + return defaultValue; + } + return transformFunction.apply(value); + } } diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 3107929d56..43a1338152 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -125,6 +125,9 @@ sudo.enable=true # network interface preferred like eth0, default: empty #dolphin.scheduler.network.interface.preferred= +# network interface restrict like docker0,docker1 , default: docker0 +dolphin.scheduler.network.interface.restrict=docker0 + # network IP gets priority, default: inner outer #dolphin.scheduler.network.priority.strategy=default diff --git a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java index 4f25c2077a..493e3b9d08 100644 --- a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java +++ b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java @@ -19,9 +19,18 @@ package org.apache.dolphinscheduler.common.utils; import org.apache.dolphinscheduler.common.constants.Constants; +import org.apache.commons.lang3.StringUtils; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; +import java.util.stream.Collectors; + import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import com.google.common.collect.Sets; + public class PropertyUtilsTest { @Test @@ -33,4 +42,15 @@ public class PropertyUtilsTest { public void getResUploadStartupState() { Assertions.assertTrue(PropertyUtils.getResUploadStartupState()); } + + @Test + public void getSet() { + Set networkInterface = PropertyUtils.getSet("networkInterface", value -> { + if (StringUtils.isEmpty(value)) { + return Collections.emptySet(); + } + return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toSet()); + }, Sets.newHashSet("docker0")); + Assertions.assertEquals(Sets.newHashSet("docker0"), networkInterface); + } }