Browse Source

Support filter restrict network interface (#14638)

3.2.1-prepare
Wenjun Ruan 11 months ago committed by GitHub
parent
commit
2b99451ccd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 63
      docs/docs/en/architecture/configuration.md
  2. 63
      docs/docs/zh/architecture/configuration.md
  3. 12
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java
  4. 37
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java
  5. 10
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java
  6. 3
      dolphinscheduler-common/src/main/resources/common.properties
  7. 20
      dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java

63
docs/docs/en/architecture/configuration.md

@ -200,37 +200,38 @@ Currently, common.properties mainly configures Hadoop,s3a related configurations
The default configuration is as follows:
| Parameters | Default value | Description |
|--|--|--|
|data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files|
|resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE|
|resource.upload.path | /dolphinscheduler | storage path of resource files|
|aws.access.key.id | minioadmin | access key id of S3|
|aws.secret.access.key | minioadmin | secret access key of S3|
|aws.region | us-east-1 | region of S3|
|aws.s3.endpoint | http://minio:9000 | endpoint of S3|
|hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS|
|fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory|
|hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission|
|java.security.krb5.conf.path | /opt/krb5.conf | kerberos config directory|
|login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos username|
|login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos user keytab|
|kerberos.expire.time | 2 | kerberos expire time,integer,the unit is hour|
|yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | specify the yarn resourcemanager url. if resourcemanager supports HA, input HA IP addresses (separated by comma), or input null for standalone|
|yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | keep default if ResourceManager supports HA or not use ResourceManager, or replace ds1 with corresponding hostname if ResourceManager in standalone mode|
|development.state | false | specify whether in development state|
|dolphin.scheduler.network.interface.preferred | NONE | display name of the network card|
|dolphin.scheduler.network.priority.strategy | default | IP acquisition strategy, give priority to finding the internal network or the external network|
|resource.manager.httpaddress.port | 8088 | the port of resource manager|
|yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | job history status url of yarn|
|datasource.encryption.enable | false | whether to enable datasource encryption|
|datasource.encryption.salt | !@#$%^&* | the salt of the datasource encryption|
|data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | the jar of data quality|
|support.hive.oneSession | false | specify whether hive SQL is executed in the same session|
|sudo.enable | true | whether to enable sudo|
|alert.rpc.port | 50052 | the RPC port of Alert Server|
|zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin|
|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop, annotation of applicationId auto collection related configuration in `bin/env/dolphinscheduler_env.sh` should be removed. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and .|
| Parameters | Default value | Description |
|-----------------------------------------------|--------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| data.basedir.path | /tmp/dolphinscheduler | local directory used to store temp files |
| resource.storage.type | NONE | type of resource files: HDFS, S3, OSS, GCS, ABS, NONE |
| resource.upload.path | /dolphinscheduler | storage path of resource files |
| aws.access.key.id | minioadmin | access key id of S3 |
| aws.secret.access.key | minioadmin | secret access key of S3 |
| aws.region | us-east-1 | region of S3 |
| aws.s3.endpoint | http://minio:9000 | endpoint of S3 |
| hdfs.root.user | hdfs | configure users with corresponding permissions if storage type is HDFS |
| fs.defaultFS | hdfs://mycluster:8020 | If resource.storage.type=S3, then the request url would be similar to 's3a://dolphinscheduler'. Otherwise if resource.storage.type=HDFS and hadoop supports HA, copy core-site.xml and hdfs-site.xml into 'conf' directory |
| hadoop.security.authentication.startup.state | false | whether hadoop grant kerberos permission |
| java.security.krb5.conf.path | /opt/krb5.conf | kerberos config directory |
| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos username |
| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos user keytab |
| kerberos.expire.time | 2 | kerberos expire time,integer,the unit is hour |
| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | specify the yarn resourcemanager url. if resourcemanager supports HA, input HA IP addresses (separated by comma), or input null for standalone |
| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | keep default if ResourceManager supports HA or not use ResourceManager, or replace ds1 with corresponding hostname if ResourceManager in standalone mode |
| development.state | false | specify whether in development state |
| dolphin.scheduler.network.interface.preferred | NONE | display name of the network card which will be used |
| dolphin.scheduler.network.interface.restrict | docker0 | display name of the network card which shouldn't be used |
| dolphin.scheduler.network.priority.strategy | default | IP acquisition strategy, give priority to finding the internal network or the external network |
| resource.manager.httpaddress.port | 8088 | the port of resource manager |
| yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | job history status url of yarn |
| datasource.encryption.enable | false | whether to enable datasource encryption |
| datasource.encryption.salt | !@#$%^&* | the salt of the datasource encryption |
| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | the jar of data quality |
| support.hive.oneSession | false | specify whether hive SQL is executed in the same session |
| sudo.enable | true | whether to enable sudo |
| alert.rpc.port | 50052 | the RPC port of Alert Server |
| zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin |
| appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop, annotation of applicationId auto collection related configuration in `bin/env/dolphinscheduler_env.sh` should be removed. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and . |
### Api-server related configuration

63
docs/docs/zh/architecture/configuration.md

@ -200,37 +200,38 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId
默认配置如下:
| 参数 | 默认值 | 描述 |
|--|--|--|
|data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件|
|resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE|
|resource.upload.path | /dolphinscheduler | 资源文件存储路径|
|aws.access.key.id | minioadmin | S3 access key|
|aws.secret.access.key | minioadmin | S3 secret access key|
|aws.region | us-east-1 | S3 区域|
|aws.s3.endpoint | http://minio:9000 | S3 endpoint地址|
|hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户|
|fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录|
|hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限|
|java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录|
|login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户|
|login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab|
|kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时|
|yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可|
|yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname|
|development.state | false | 是否处于开发模式|
|dolphin.scheduler.network.interface.preferred | NONE | 网卡名称|
|dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网|
|resource.manager.httpaddress.port | 8088 | resource manager的端口|
|yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL|
|datasource.encryption.enable | false | 是否启用datasource 加密|
|datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt|
|data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包|
|support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行|
|sudo.enable | true | 是否开启sudo|
|alert.rpc.port | 50052 | Alert Server的RPC端口|
|zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址|
|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效|
| 参数 | 默认值 | 描述 |
|-----------------------------------------------|--|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 |
| resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE |
| resource.upload.path | /dolphinscheduler | 资源文件存储路径 |
| aws.access.key.id | minioadmin | S3 access key |
| aws.secret.access.key | minioadmin | S3 secret access key |
| aws.region | us-east-1 | S3 区域 |
| aws.s3.endpoint | http://minio:9000 | S3 endpoint地址 |
| hdfs.root.user | hdfs | 如果存储类型为HDFS,需要配置拥有对应操作权限的用户 |
| fs.defaultFS | hdfs://mycluster:8020 | 请求地址如果resource.storage.type=S3,该值类似为: s3a://dolphinscheduler. 如果resource.storage.type=HDFS, 如果 hadoop 配置了 HA,需要复制core-site.xml 和 hdfs-site.xml 文件到conf目录 |
| hadoop.security.authentication.startup.state | false | hadoop是否开启kerberos权限 |
| java.security.krb5.conf.path | /opt/krb5.conf | kerberos配置目录 |
| login.user.keytab.username | hdfs-mycluster@ESZ.COM | kerberos登录用户 |
| login.user.keytab.path | /opt/hdfs.headless.keytab | kerberos登录用户keytab |
| kerberos.expire.time | 2 | kerberos过期时间,整数,单位为小时 |
| yarn.resourcemanager.ha.rm.ids | 192.168.xx.xx,192.168.xx.xx | yarn resourcemanager 地址, 如果resourcemanager开启了HA, 输入HA的IP地址(以逗号分隔),如果resourcemanager为单节点, 该值为空即可 |
| yarn.application.status.address | http://ds1:8088/ws/v1/cluster/apps/%s | 如果resourcemanager开启了HA或者没有使用resourcemanager,保持默认值即可. 如果resourcemanager为单节点,你需要将ds1 配置为resourcemanager对应的hostname |
| development.state | false | 是否处于开发模式 |
| dolphin.scheduler.network.interface.preferred | NONE | 将会被使用的网卡名称 |
| dolphin.scheduler.network.interface.restrict | NONE | 禁止使用的网卡名称 |
| dolphin.scheduler.network.priority.strategy | default | ip获取策略 default优先获取内网 |
| resource.manager.httpaddress.port | 8088 | resource manager的端口 |
| yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL |
| datasource.encryption.enable | false | 是否启用datasource 加密 |
| datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt |
| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 |
| support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 |
| sudo.enable | true | 是否开启sudo |
| alert.rpc.port | 50052 | Alert Server的RPC端口 |
| zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址 |
| appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效 |
## Api-server相关配置

12
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java

@ -661,18 +661,6 @@ public final class Constants {
*/
public static final String SYSTEM_LINE_SEPARATOR = System.getProperty("line.separator");
/**
* network interface preferred
*/
public static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED =
"dolphin.scheduler.network.interface.preferred";
/**
* network IP gets priority, default inner outer
*/
public static final String DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY =
"dolphin.scheduler.network.priority.strategy";
/**
* exec shell scripts
*/

37
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/NetUtils.java

@ -17,8 +17,6 @@
package org.apache.dolphinscheduler.common.utils;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.conn.util.InetAddressUtils;
@ -31,20 +29,32 @@ import java.net.NetworkInterface;
import java.net.SocketException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import com.google.common.collect.Sets;
/**
* NetUtils
*/
@Slf4j
public class NetUtils {
private static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED =
"dolphin.scheduler.network.interface.preferred";
private static final String DOLPHIN_SCHEDULER_NETWORK_INTERFACE_RESTRICT =
"dolphin.scheduler.network.interface.restrict";
private static final String DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY =
"dolphin.scheduler.network.priority.strategy";
private static final String NETWORK_PRIORITY_DEFAULT = "default";
private static final String NETWORK_PRIORITY_INNER = "inner";
private static final String NETWORK_PRIORITY_OUTER = "outer";
@ -214,6 +224,13 @@ public class NetUtils {
}
}
Set<String> restrictNetworkInterfaceName = restrictNetworkInterfaceName();
if (CollectionUtils.isNotEmpty(restrictNetworkInterfaceName)) {
validNetworkInterfaces = validNetworkInterfaces.stream()
.filter(validNetworkInterface -> !restrictNetworkInterfaceName
.contains(validNetworkInterface.getDisplayName()))
.collect(Collectors.toList());
}
return filterByNetworkPriority(validNetworkInterfaces);
}
@ -297,16 +314,24 @@ public class NetUtils {
}
private static String specifyNetworkInterfaceName() {
return PropertyUtils.getString(
Constants.DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED,
System.getProperty(Constants.DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED));
return PropertyUtils.getString(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED,
System.getProperty(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_PREFERRED));
}
private static Set<String> restrictNetworkInterfaceName() {
return PropertyUtils.getSet(DOLPHIN_SCHEDULER_NETWORK_INTERFACE_RESTRICT, value -> {
if (StringUtils.isEmpty(value)) {
return Collections.emptySet();
}
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toSet());
}, Sets.newHashSet("docker0"));
}
private static List<NetworkInterface> filterByNetworkPriority(List<NetworkInterface> validNetworkInterfaces) {
if (CollectionUtils.isEmpty(validNetworkInterfaces)) {
return Collections.emptyList();
}
String networkPriority = PropertyUtils.getString(Constants.DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY,
String networkPriority = PropertyUtils.getString(DOLPHIN_SCHEDULER_NETWORK_PRIORITY_STRATEGY,
NETWORK_PRIORITY_DEFAULT);
switch (networkPriority) {
case NETWORK_PRIORITY_DEFAULT:

10
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/PropertyUtils.java

@ -23,6 +23,7 @@ import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.dolphinscheduler.common.enums.ResUploadType;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.io.InputStream;
@ -30,6 +31,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.Function;
import lombok.extern.slf4j.Slf4j;
@ -294,4 +296,12 @@ public class PropertyUtils {
});
return propertiesMap;
}
public static <T> Set<T> getSet(String key, Function<String, Set<T>> transformFunction, Set<T> defaultValue) {
String value = (String) properties.get(key);
if (StringUtils.isEmpty(value)) {
return defaultValue;
}
return transformFunction.apply(value);
}
}

3
dolphinscheduler-common/src/main/resources/common.properties

@ -125,6 +125,9 @@ sudo.enable=true
# network interface preferred like eth0, default: empty
#dolphin.scheduler.network.interface.preferred=
# network interface restrict like docker0,docker1 , default: docker0
dolphin.scheduler.network.interface.restrict=docker0
# network IP gets priority, default: inner outer
#dolphin.scheduler.network.priority.strategy=default

20
dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/PropertyUtilsTest.java

@ -19,9 +19,18 @@ package org.apache.dolphinscheduler.common.utils;
import org.apache.dolphinscheduler.common.constants.Constants;
import org.apache.commons.lang3.StringUtils;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Sets;
public class PropertyUtilsTest {
@Test
@ -33,4 +42,15 @@ public class PropertyUtilsTest {
public void getResUploadStartupState() {
Assertions.assertTrue(PropertyUtils.getResUploadStartupState());
}
@Test
public void getSet() {
Set<String> networkInterface = PropertyUtils.getSet("networkInterface", value -> {
if (StringUtils.isEmpty(value)) {
return Collections.emptySet();
}
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toSet());
}, Sets.newHashSet("docker0"));
Assertions.assertEquals(Sets.newHashSet("docker0"), networkInterface);
}
}

Loading…
Cancel
Save