Browse Source

Support filter restrict network interface (#14638)

3.2.1-prepare
Wenjun Ruan 11 months ago committed by GitHub
parent
commit
2b99451ccd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 63
      docs/docs/en/architecture/configuration.md
  2. 63
      docs/docs/zh/architecture/configuration.md
  3. 12
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java
  4. 37
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java
  5. 10
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java
  6. 3
      dolphinscheduler-common/src/main/resources/common.properties
  7. 20
      dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java

63
docs/docs/en/architecture/configuration.md

@ -200,37 +200,38 @@ Currently, common.properties mainly configures Hadoop,s3a related configurations
The default configuration is as follows: The default configuration is as follows:
| Parameters | Default value | Description | | Parameters | Default value | Description |
|--|--|--| |-----------------------------------------------|--------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files| | data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files |
|resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE| | resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE |
|resource.upload.path | /dolphinscheduler | storage path of resource files| | resource.upload.path | /dolphinscheduler | storage path of resource files |
|aws.access.key.id | minioadmin | access key id of S3| | aws.access.key.id | minioadmin | access key id of S3 |
|aws.secret.access.key | minioadmin | secret access key of S3| | aws.secret.access.key | minioadmin | secret access key of S3 |
|aws.region | us-east-1 | region of S3| | aws.region | us-east-1 | region of S3 |
|aws.s3.endpoint | http://minio:9000 | endpoint of S3| | aws.s3.endpoint | http://minio:9000 | endpoint of S3 |
|hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS| | hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS |
|fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory| | fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory |
|hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission| | hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission |
|java.security.krb5.conf.path | /opt/krb5.conf | kerberos config directory| | java.security.krb5.conf.path | /opt/krb5.conf | kerberos config directory |
|login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos username| | login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos username |
|login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos user keytab| | login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos user keytab |
|kerberos.expire.time | 2 | kerberos expire time,integer,the unit is hour| | kerberos.expire.time | 2 | kerberos expire time,integer,the unit is hour |
|yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | specify the yarn resourcemanager url. if resourcemanager supports HA, input HA IP addresses (separated by comma), or input null for standalone| | yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | specify the yarn resourcemanager url. if resourcemanager supports HA, input HA IP addresses (separated by comma), or input null for standalone |
|yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | keep default if ResourceManager supports HA or not use ResourceManager, or replace ds1 with corresponding hostname if ResourceManager in standalone mode| | yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | keep default if ResourceManager supports HA or not use ResourceManager, or replace ds1 with corresponding hostname if ResourceManager in standalone mode |
|development.state | false | specify whether in development state| | development.state | false | specify whether in development state |
|dolphin.scheduler.network.interface.preferred | NONE | display name of the network card| | dolphin.scheduler.network.interface.preferred | NONE | display name of the network card which will be used |
|dolphin.scheduler.network.priority.strategy | default | IP acquisition strategy, give priority to finding the internal network or the external network| | dolphin.scheduler.network.interface.restrict | docker0 | display name of the network card which shouldn't be used |
|resource.manager.httpaddress.port | 8088 | the port of resource manager| | dolphin.scheduler.network.priority.strategy | default | IP acquisition strategy, give priority to finding the internal network or the external network |
|yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | job history status url of yarn| | resource.manager.httpaddress.port | 8088 | the port of resource manager |
|datasource.encryption.enable | false | whether to enable datasource encryption| | yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | job history status url of yarn |
|datasource.encryption.salt | !@#$%^&* | the salt of the datasource encryption| | datasource.encryption.enable | false | whether to enable datasource encryption |
|data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | the jar of data quality| | datasource.encryption.salt | !@#$%^&* | the salt of the datasource encryption |
|support.hive.oneSession | false | specify whether hive SQL is executed in the same session| | data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | the jar of data quality |
|sudo.enable | true | whether to enable sudo| | support.hive.oneSession | false | specify whether hive SQL is executed in the same session |
|alert.rpc.port | 50052 | the RPC port of Alert Server| | sudo.enable | true | whether to enable sudo |
|zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| | alert.rpc.port | 50052 | the RPC port of Alert Server |
|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop, annotation of applicationId auto collection related configuration in `bin/env/dolphinscheduler_env.sh` should be removed. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and .| | zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin |
| appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop, annotation of applicationId auto collection related configuration in `bin/env/dolphinscheduler_env.sh` should be removed. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and . |
### Api-server related configuration ### Api-server related configuration

63
docs/docs/zh/architecture/configuration.md

@ -200,37 +200,38 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId
默认配置如下: 默认配置如下:
| 参数 | 默认值 | 描述 | | 参数 | 默认值 | 描述 |
|--|--|--| |-----------------------------------------------|--|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件| | data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 |
|resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE| | resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE |
|resource.upload.path | /dolphinscheduler | 资源文件存储路径| | resource.upload.path | /dolphinscheduler | 资源文件存储路径 |
|aws.access.key.id | minioadmin | S3 access key| | aws.access.key.id | minioadmin | S3 access key |
|aws.secret.access.key | minioadmin | S3 secret access key| | aws.secret.access.key | minioadmin | S3 secret access key |
|aws.region | us-east-1 | S3 区域| | aws.region | us-east-1 | S3 区域 |
|aws.s3.endpoint | http://minio:9000 | S3 endpoint地址| | aws.s3.endpoint | http://minio:9000 | S3 endpoint地址 |
|hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户| | hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户 |
|fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录| | fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录 |
|hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限| | hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限 |
|java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录| | java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录 |
|login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户| | login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户 |
|login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab| | login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab |
|kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时| | kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时 |
|yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可| | yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可 |
|yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname| | yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname |
|development.state | false | 是否处于开发模式| | development.state | false | 是否处于开发模式 |
|dolphin.scheduler.network.interface.preferred | NONE | 网卡名称| | dolphin.scheduler.network.interface.preferred | NONE | 将会被使用的网卡名称 |
|dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网| | dolphin.scheduler.network.interface.restrict | NONE | 禁止使用的网卡名称 |
|resource.manager.httpaddress.port | 8088 | resource manager的端口| | dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网 |
|yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL| | resource.manager.httpaddress.port | 8088 | resource manager的端口 |
|datasource.encryption.enable | false | 是否启用datasource 加密| | yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL |
|datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt| | datasource.encryption.enable | false | 是否启用datasource 加密 |
|data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包| | datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt |
|support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行| | data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 |
|sudo.enable | true | 是否开启sudo| | support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 |
|alert.rpc.port | 50052 | Alert Server的RPC端口| | sudo.enable | true | 是否开启sudo |
|zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| | alert.rpc.port | 50052 | Alert Server的RPC端口 |
|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| | zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址 |
| appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效 |
## Api-server相关配置 ## Api-server相关配置

12
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java

@ -661,18 +661,6 @@ public final class Constants {
*/ */
public static final String SYSTEM_LINE_SEPARATOR = System.getProperty("line.separator"); public static final String SYSTEM_LINE_SEPARATOR = System.getProperty("line.separator");
/**
* network interface preferred
*/
public static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED =
"dolphin.scheduler.network.interface.preferred";
/**
* network IP gets priority, default inner outer
*/
public static final String DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY =
"dolphin.scheduler.network.priority.strategy";
/** /**
* exec shell scripts * exec shell scripts
*/ */

37
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java

@ -17,8 +17,6 @@
package org.apache.dolphinscheduler.common.utils; package org.apache.dolphinscheduler.common.utils;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.http.conn.util.InetAddressUtils; import org.apache.http.conn.util.InetAddressUtils;
@ -31,20 +29,32 @@ import java.net.NetworkInterface;
import java.net.SocketException; import java.net.SocketException;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import com.google.common.collect.Sets;
/** /**
* NetUtils * NetUtils
*/ */
@Slf4j @Slf4j
public class NetUtils { public class NetUtils {
private static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED =
"dolphin.scheduler.network.interface.preferred";
private static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_RESTRICT =
"dolphin.scheduler.network.interface.restrict";
private static final String DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY =
"dolphin.scheduler.network.priority.strategy";
private static final String NETWORK_PRIORITY_DEFAULT = "default"; private static final String NETWORK_PRIORITY_DEFAULT = "default";
private static final String NETWORK_PRIORITY_INNER = "inner"; private static final String NETWORK_PRIORITY_INNER = "inner";
private static final String NETWORK_PRIORITY_OUTER = "outer"; private static final String NETWORK_PRIORITY_OUTER = "outer";
@ -214,6 +224,13 @@ public class NetUtils {
} }
} }
Set<String> restrictNetworkInterfaceName = restrictNetworkInterfaceName();
if (CollectionUtils.isNotEmpty(restrictNetworkInterfaceName)) {
validNetworkInterfaces = validNetworkInterfaces.stream()
.filter(validNetworkInterface -> !restrictNetworkInterfaceName
.contains(validNetworkInterface.getDisplayName()))
.collect(Collectors.toList());
}
return filterByNetworkPriority(validNetworkInterfaces); return filterByNetworkPriority(validNetworkInterfaces);
} }
@ -297,16 +314,24 @@ public class NetUtils {
} }
private static String specifyNetworkInterfaceName() { private static String specifyNetworkInterfaceName() {
return PropertyUtils.getString( return PropertyUtils.getString(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED,
Constants.DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED, System.getProperty(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED));
System.getProperty(Constants.DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED)); }
private static Set<String> restrictNetworkInterfaceName() {
return PropertyUtils.getSet(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_RESTRICT, value -> {
if (StringUtils.isEmpty(value)) {
return Collections.emptySet();
}
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toSet());
}, Sets.newHashSet("docker0"));
} }
private static List<NetworkInterface> filterByNetworkPriority(List<NetworkInterface> validNetworkInterfaces) { private static List<NetworkInterface> filterByNetworkPriority(List<NetworkInterface> validNetworkInterfaces) {
if (CollectionUtils.isEmpty(validNetworkInterfaces)) { if (CollectionUtils.isEmpty(validNetworkInterfaces)) {
return Collections.emptyList(); return Collections.emptyList();
} }
String networkPriority = PropertyUtils.getString(Constants.DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY, String networkPriority = PropertyUtils.getString(DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY,
NETWORK_PRIORITY_DEFAULT); NETWORK_PRIORITY_DEFAULT);
switch (networkPriority) { switch (networkPriority) {
case NETWORK_PRIORITY_DEFAULT: case NETWORK_PRIORITY_DEFAULT:

10
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java

@ -23,6 +23,7 @@ import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.enums.ResUploadType; import org.apache.dolphinscheduler.common.enums.ResUploadType;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -30,6 +31,7 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.function.Function;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@ -294,4 +296,12 @@ public class PropertyUtils {
}); });
return propertiesMap; return propertiesMap;
} }
public static <T> Set<T> getSet(String key, Function<String, Set<T>> transformFunction, Set<T> defaultValue) {
String value = (String) properties.get(key);
if (StringUtils.isEmpty(value)) {
return defaultValue;
}
return transformFunction.apply(value);
}
} }

3
dolphinscheduler-common/src/main/resources/common.properties

@ -125,6 +125,9 @@ sudo.enable=true
# network interface preferred like eth0, default: empty # network interface preferred like eth0, default: empty
#dolphin.scheduler.network.interface.preferred= #dolphin.scheduler.network.interface.preferred=
# network interface restrict like docker0,docker1 , default: docker0
dolphin.scheduler.network.interface.restrict=docker0
# network IP gets priority, default: inner outer # network IP gets priority, default: inner outer
#dolphin.scheduler.network.priority.strategy=default #dolphin.scheduler.network.priority.strategy=default

20
dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java

@ -19,9 +19,18 @@ package org.apache.dolphinscheduler.common.utils;
import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.commons.lang3.StringUtils;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.google.common.collect.Sets;
public class PropertyUtilsTest { public class PropertyUtilsTest {
@Test @Test
@ -33,4 +42,15 @@ public class PropertyUtilsTest {
public void getResUploadStartupState() { public void getResUploadStartupState() {
Assertions.assertTrue(PropertyUtils.getResUploadStartupState()); Assertions.assertTrue(PropertyUtils.getResUploadStartupState());
} }
@Test
public void getSet() {
Set<String> networkInterface = PropertyUtils.getSet("networkInterface", value -> {
if (StringUtils.isEmpty(value)) {
return Collections.emptySet();
}
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toSet());
}, Sets.newHashSet("docker0"));
Assertions.assertEquals(Sets.newHashSet("docker0"), networkInterface);
}
} }

Loading…
Cancel
Save