From 9135b1420e30aedaf3ba49ecff6ef53d8868784b Mon Sep 17 00:00:00 2001 From: qiaozhanwei <825193156@qq.com> Date: Mon, 24 Jun 2019 19:33:17 +0800 Subject: [PATCH 1/6] add resource upload s3 --- .../api/service/ResourcesService.java | 40 ++++----- .../escheduler/api/service/TenantService.java | 4 +- .../api/service/UdfFuncService.java | 12 +-- .../escheduler/api/service/UsersService.java | 6 +- .../java/cn/escheduler/common/Constants.java | 21 ++++- .../common/enums/ResUploadType.java | 29 +++++++ .../escheduler/common/utils/HadoopUtils.java | 87 +++++++++++-------- .../common/utils/PropertyUtils.java | 14 ++- .../main/resources/common/common.properties | 4 +- .../resources/common/hadoop/hadoop.properties | 12 ++- install.sh | 24 +++-- 11 files changed, 168 insertions(+), 85 deletions(-) create mode 100644 escheduler-common/src/main/java/cn/escheduler/common/enums/ResUploadType.java diff --git a/escheduler-api/src/main/java/cn/escheduler/api/service/ResourcesService.java b/escheduler-api/src/main/java/cn/escheduler/api/service/ResourcesService.java index b59e43387b..4f2c62271c 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/service/ResourcesService.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/service/ResourcesService.java @@ -86,8 +86,8 @@ public class ResourcesService extends BaseService { Result result = new Result(); // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } @@ -185,9 +185,9 @@ public class ResourcesService extends BaseService { ResourceType type) { Result result = new Result(); - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } @@ -386,9 +386,9 @@ public class ResourcesService extends BaseService { public Result delete(User loginUser, int resourceId) throws Exception { Result result = new Result(); - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } @@ -449,9 +449,9 @@ public class ResourcesService extends BaseService { public Result readResource(int resourceId, int skipLineNum, int limit) { Result result = new Result(); - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } @@ -510,9 +510,9 @@ public class ResourcesService extends BaseService { @Transactional(value = "TransactionManager",rollbackFor = Exception.class) public Result onlineCreateResource(User loginUser, ResourceType type, String fileName, String fileSuffix, String desc, String content) { Result result = new Result(); - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } @@ -573,9 +573,9 @@ public class ResourcesService extends BaseService { public Result updateResourceContent(int resourceId, String content) { Result result = new Result(); - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } @@ -663,9 +663,9 @@ public class ResourcesService extends BaseService { * @return */ public org.springframework.core.io.Resource downloadResource(int resourceId) throws Exception { - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); throw new RuntimeException("hdfs not startup"); } diff --git a/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java b/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java index 68fbc55348..7538071e3a 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java @@ -96,7 +96,7 @@ public class TenantService extends BaseService{ tenantMapper.insert(tenant); // if hdfs startup - if (PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ + if (PropertyUtils.getResUploadStartupState()){ String resourcePath = HadoopUtils.getHdfsDataBasePath() + "/" + tenantCode + "/resources"; String udfsPath = HadoopUtils.getHdfsUdfDir(tenantCode); /** @@ -178,7 +178,7 @@ public class TenantService extends BaseService{ Tenant newTenant = tenantMapper.queryByTenantCode(tenantCode); if (newTenant == null){ // if hdfs startup - if (PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ + if (PropertyUtils.getResUploadStartupState()){ String resourcePath = HadoopUtils.getHdfsDataBasePath() + "/" + tenantCode + "/resources"; String udfsPath = HadoopUtils.getHdfsUdfDir(tenantCode); //init hdfs resource diff --git a/escheduler-api/src/main/java/cn/escheduler/api/service/UdfFuncService.java b/escheduler-api/src/main/java/cn/escheduler/api/service/UdfFuncService.java index 52e605f711..8ca399d17f 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/service/UdfFuncService.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/service/UdfFuncService.java @@ -80,9 +80,9 @@ public class UdfFuncService extends BaseService{ int resourceId) { Result result = new Result(); - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } @@ -167,9 +167,9 @@ public class UdfFuncService extends BaseService{ // verify udfFunc is exist UdfFunc udf = udfFuncMapper.queryUdfById(udfFuncId); - // if hdfs not startup - if (!PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ - logger.error("hdfs startup state: {}", PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)); + // if resource upload startup + if (!PropertyUtils.getResUploadStartupState()){ + logger.error("resource upload startup state: {}", PropertyUtils.getResUploadStartupState()); putMsg(result, Status.HDFS_NOT_STARTUP); return result; } diff --git a/escheduler-api/src/main/java/cn/escheduler/api/service/UsersService.java b/escheduler-api/src/main/java/cn/escheduler/api/service/UsersService.java index 5db8662c07..c0dab22b69 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/service/UsersService.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/service/UsersService.java @@ -125,7 +125,7 @@ public class UsersService extends BaseService { Tenant tenant = tenantMapper.queryById(tenantId); // if hdfs startup - if (PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ + if (PropertyUtils.getResUploadStartupState()){ String userPath = HadoopUtils.getHdfsDataBasePath() + "/" + tenant.getTenantCode() + "/home/" + user.getId(); HadoopUtils.getInstance().mkdir(userPath); @@ -245,7 +245,7 @@ public class UsersService extends BaseService { Tenant newTenant = tenantMapper.queryById(tenantId); if (newTenant != null) { // if hdfs startup - if (PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ + if (PropertyUtils.getResUploadStartupState()){ String newTenantCode = newTenant.getTenantCode(); String oldResourcePath = HadoopUtils.getHdfsDataBasePath() + "/" + oldTenant.getTenantCode() + "/resources"; String oldUdfsPath = HadoopUtils.getHdfsUdfDir(oldTenant.getTenantCode()); @@ -308,7 +308,7 @@ public class UsersService extends BaseService { User user = userMapper.queryTenantCodeByUserId(id); - if (PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ + if (PropertyUtils.getResUploadStartupState()){ String userPath = HadoopUtils.getHdfsDataBasePath() + "/" + user.getTenantCode() + "/home/" + id; HadoopUtils.getInstance().delete(userPath, true); diff --git a/escheduler-common/src/main/java/cn/escheduler/common/Constants.java b/escheduler-common/src/main/java/cn/escheduler/common/Constants.java index ddd27170db..30d2e26ded 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/Constants.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/Constants.java @@ -60,6 +60,23 @@ public final class Constants { */ public static final String FS_DEFAULTFS = "fs.defaultFS"; + + /** + * fs s3a endpoint + */ + public static final String FS_S3A_ENDPOINT = "fs.s3a.endpoint"; + + /** + * fs s3a access key + */ + public static final String FS_S3A_ACCESS_KEY = "fs.s3a.access.key"; + + /** + * fs s3a secret key + */ + public static final String FS_S3A_SECRET_KEY = "fs.s3a.secret.key"; + + /** * yarn.resourcemanager.ha.rm.idsfs.defaultFS */ @@ -123,9 +140,9 @@ public final class Constants { public static final String DEVELOPMENT_STATE = "development.state"; /** - * hdfs.startup.state + * res.upload.startup.type */ - public static final String HDFS_STARTUP_STATE = "hdfs.startup.state"; + public static final String RES_UPLOAD_STARTUP_TYPE = "res.upload.startup.type"; /** * zookeeper quorum diff --git a/escheduler-common/src/main/java/cn/escheduler/common/enums/ResUploadType.java b/escheduler-common/src/main/java/cn/escheduler/common/enums/ResUploadType.java new file mode 100644 index 0000000000..65d8be8f92 --- /dev/null +++ b/escheduler-common/src/main/java/cn/escheduler/common/enums/ResUploadType.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.common.enums; + +/** + * data base types + */ +public enum ResUploadType { + /** + * 0 hdfs + * 1 s3 + * 2 none + */ + HDFS,S3,NONE +} diff --git a/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java b/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java index 85716bec81..6f3e5e2198 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java @@ -18,6 +18,7 @@ package cn.escheduler.common.utils; import cn.escheduler.common.Constants; import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.enums.ResUploadType; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONException; import com.alibaba.fastjson.JSONObject; @@ -40,6 +41,7 @@ import java.util.stream.Stream; import static cn.escheduler.common.Constants.*; import static cn.escheduler.common.utils.PropertyUtils.*; +import static cn.escheduler.common.utils.PropertyUtils.getString; /** * hadoop utils @@ -94,48 +96,61 @@ public class HadoopUtils implements Closeable { try { configuration = new Configuration(); - if (getBoolean(Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE)){ - System.setProperty(Constants.JAVA_SECURITY_KRB5_CONF, - getString(Constants.JAVA_SECURITY_KRB5_CONF_PATH)); - configuration.set(Constants.HADOOP_SECURITY_AUTHENTICATION,"kerberos"); - UserGroupInformation.setConfiguration(configuration); - UserGroupInformation.loginUserFromKeytab(getString(Constants.LOGIN_USER_KEY_TAB_USERNAME), - getString(Constants.LOGIN_USER_KEY_TAB_PATH)); - } + String resUploadStartupType = PropertyUtils.getString(Constants.RES_UPLOAD_STARTUP_TYPE); + ResUploadType resUploadType = ResUploadType.valueOf(resUploadStartupType); + + if (resUploadType == ResUploadType.HDFS){ + if (getBoolean(Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE)){ + System.setProperty(Constants.JAVA_SECURITY_KRB5_CONF, + getString(Constants.JAVA_SECURITY_KRB5_CONF_PATH)); + configuration.set(Constants.HADOOP_SECURITY_AUTHENTICATION,"kerberos"); + UserGroupInformation.setConfiguration(configuration); + UserGroupInformation.loginUserFromKeytab(getString(Constants.LOGIN_USER_KEY_TAB_USERNAME), + getString(Constants.LOGIN_USER_KEY_TAB_PATH)); + } - String defaultFS = configuration.get(FS_DEFAULTFS); - //first get key from core-site.xml hdfs-site.xml ,if null ,then try to get from properties file - // the default is the local file system - if(defaultFS.startsWith("file")){ - String defaultFSProp = getString(FS_DEFAULTFS); - if(StringUtils.isNotBlank(defaultFSProp)){ - Map fsRelatedProps = getPrefixedProperties("fs."); - configuration.set(FS_DEFAULTFS,defaultFSProp); - fsRelatedProps.entrySet().stream().forEach(entry -> configuration.set(entry.getKey(), entry.getValue())); + String defaultFS = configuration.get(FS_DEFAULTFS); + //first get key from core-site.xml hdfs-site.xml ,if null ,then try to get from properties file + // the default is the local file system + if(defaultFS.startsWith("file")){ + String defaultFSProp = getString(FS_DEFAULTFS); + if(StringUtils.isNotBlank(defaultFSProp)){ + Map fsRelatedProps = getPrefixedProperties("fs."); + configuration.set(FS_DEFAULTFS,defaultFSProp); + fsRelatedProps.entrySet().stream().forEach(entry -> configuration.set(entry.getKey(), entry.getValue())); + }else{ + logger.error("property:{} can not to be empty, please set!"); + throw new RuntimeException("property:{} can not to be empty, please set!"); + } }else{ - logger.error("property:{} can not to be empty, please set!"); - throw new RuntimeException("property:{} can not to be empty, please set!"); + logger.info("get property:{} -> {}, from core-site.xml hdfs-site.xml ", FS_DEFAULTFS, defaultFS); } - }else{ - logger.info("get property:{} -> {}, from core-site.xml hdfs-site.xml ", FS_DEFAULTFS, defaultFS); - } - if (fs == null) { - if(StringUtils.isNotEmpty(hdfsUser)){ - //UserGroupInformation ugi = UserGroupInformation.createProxyUser(hdfsUser,UserGroupInformation.getLoginUser()); - UserGroupInformation ugi = UserGroupInformation.createRemoteUser(hdfsUser); - ugi.doAs(new PrivilegedExceptionAction() { - @Override - public Boolean run() throws Exception { - fs = FileSystem.get(configuration); - return true; - } - }); - }else{ - logger.warn("hdfs.root.user is not set value!"); - fs = FileSystem.get(configuration); + if (fs == null) { + if(StringUtils.isNotEmpty(hdfsUser)){ + //UserGroupInformation ugi = UserGroupInformation.createProxyUser(hdfsUser,UserGroupInformation.getLoginUser()); + UserGroupInformation ugi = UserGroupInformation.createRemoteUser(hdfsUser); + ugi.doAs(new PrivilegedExceptionAction() { + @Override + public Boolean run() throws Exception { + fs = FileSystem.get(configuration); + return true; + } + }); + }else{ + logger.warn("hdfs.root.user is not set value!"); + fs = FileSystem.get(configuration); + } } + }else if (resUploadType == ResUploadType.S3){ + configuration.set(FS_DEFAULTFS,getString(FS_DEFAULTFS)); + configuration.set(FS_S3A_ENDPOINT,getString(FS_S3A_ENDPOINT)); + configuration.set(FS_S3A_ACCESS_KEY,getString(FS_S3A_ACCESS_KEY)); + configuration.set(FS_S3A_SECRET_KEY,getString(FS_S3A_SECRET_KEY)); + fs = FileSystem.get(configuration); } + + String rmHaIds = getString(YARN_RESOURCEMANAGER_HA_RM_IDS); String appAddress = getString(Constants.YARN_APPLICATION_STATUS_ADDRESS); if (!StringUtils.isEmpty(rmHaIds)) { diff --git a/escheduler-common/src/main/java/cn/escheduler/common/utils/PropertyUtils.java b/escheduler-common/src/main/java/cn/escheduler/common/utils/PropertyUtils.java index f5dab12618..475cbfb72e 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/utils/PropertyUtils.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/utils/PropertyUtils.java @@ -16,6 +16,8 @@ */ package cn.escheduler.common.utils; +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ResUploadType; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,11 +67,15 @@ public class PropertyUtils { } } -/* - public static PropertyUtils getInstance(){ - return propertyUtils; + /** + * judge whether resource upload startup + * @return + */ + public static Boolean getResUploadStartupState(){ + String resUploadStartupType = PropertyUtils.getString(Constants.RES_UPLOAD_STARTUP_TYPE); + ResUploadType resUploadType = ResUploadType.valueOf(resUploadStartupType); + return resUploadType == ResUploadType.HDFS || resUploadType == ResUploadType.S3; } -*/ /** * get property value diff --git a/escheduler-common/src/main/resources/common/common.properties b/escheduler-common/src/main/resources/common/common.properties index 357ba2cbc6..874d3d0f0b 100644 --- a/escheduler-common/src/main/resources/common/common.properties +++ b/escheduler-common/src/main/resources/common/common.properties @@ -16,8 +16,8 @@ hdfs.root.user=hdfs # data base dir, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions。"/escheduler" is recommended data.store2hdfs.basepath=/escheduler -# whether hdfs starts -hdfs.startup.state=false +# resource upload startup type : HDFS,S3,NONE +res.upload.startup.type=NONE # whether kerberos starts hadoop.security.authentication.startup.state=false diff --git a/escheduler-common/src/main/resources/common/hadoop/hadoop.properties b/escheduler-common/src/main/resources/common/hadoop/hadoop.properties index f210ae7533..81452a83a2 100644 --- a/escheduler-common/src/main/resources/common/hadoop/hadoop.properties +++ b/escheduler-common/src/main/resources/common/hadoop/hadoop.properties @@ -1,6 +1,16 @@ -# ha or single namenode,If namenode ha needs to copy core-site.xml and hdfs-site.xml to the conf directory +# ha or single namenode,If namenode ha needs to copy core-site.xml and hdfs-site.xml +# to the conf directory,support s3,for example : s3a://escheduler fs.defaultFS=hdfs://mycluster:8020 +# s3 need,s3 endpoint +fs.s3a.endpoint=http://192.168.199.91:9010 + +# s3 need,s3 access key +fs.s3a.access.key=A3DXS30FO22544RE + +# s3 need,s3 secret key +fs.s3a.secret.key=OloCLq3n+8+sdPHUhJ21XrSxTC+JK + #resourcemanager ha note this need ips , this empty if single yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx diff --git a/install.sh b/install.sh index 2245409986..dc26cc59ef 100644 --- a/install.sh +++ b/install.sh @@ -110,14 +110,17 @@ xlsFilePath="/tmp/xls" #是否启动监控自启动脚本 monitorServerState="false" -# hadoop 配置 -# 是否启动hdfs,如果启动则为true,需要配置以下hadoop相关参数; -# 不启动设置为false,如果为false,以下配置不需要修改 -# 特别注意:如果启动hdfs,需要自行创建hdfs根路径,也就是install.sh中的 hdfsPath -hdfsStartupSate="false" +# 资源中心上传选择存储方式:HDFS,S3,NONE +resUploadStartupType="NONE" -# namenode地址,支持HA,需要将core-site.xml和hdfs-site.xml放到conf目录下 -namenodeFs="hdfs://mycluster:8020" +# 如果resUploadStartupType为HDFS,defaultFS写namenode地址,支持HA,需要将core-site.xml和hdfs-site.xml放到conf目录下 +# 如果是S3,则写S3地址,比如说:s3a://escheduler,注意,一定要创建根目录/escheduler +defaultFS="hdfs://mycluster:8020" + +# 如果配置了S3,则需要有以下配置 +s3Endpoint="http://192.168.199.91:9010" +s3AccessKey="A3DXS30FO22544RE" +s3SecretKey="OloCLq3n+8+sdPHUhJ21XrSxTC+JK" # resourcemanager HA配置,如果是单resourcemanager,这里为空即可 yarnHaIps="192.168.xx.xx,192.168.xx.xx" @@ -273,7 +276,10 @@ sed -i ${txt} "s#org.quartz.dataSource.myDs.user.*#org.quartz.dataSource.myDs.us sed -i ${txt} "s#org.quartz.dataSource.myDs.password.*#org.quartz.dataSource.myDs.password=${mysqlPassword}#g" conf/quartz.properties -sed -i ${txt} "s#fs.defaultFS.*#fs.defaultFS=${namenodeFs}#g" conf/common/hadoop/hadoop.properties +sed -i ${txt} "s#fs.defaultFS.*#fs.defaultFS=${defaultFS}#g" conf/common/hadoop/hadoop.properties +sed -i ${txt} "s#fs.s3a.endpoint.*#fs.s3a.endpoint=${s3Endpoint}#g" conf/common/hadoop/hadoop.properties +sed -i ${txt} "s#fs.s3a.access.key.*#fs.s3a.access.key=${s3AccessKey}#g" conf/common/hadoop/hadoop.properties +sed -i ${txt} "s#fs.s3a.secret.key.*#fs.s3a.secret.key=${s3SecretKey}#g" conf/common/hadoop/hadoop.properties sed -i ${txt} "s#yarn.resourcemanager.ha.rm.ids.*#yarn.resourcemanager.ha.rm.ids=${yarnHaIps}#g" conf/common/hadoop/hadoop.properties sed -i ${txt} "s#yarn.application.status.address.*#yarn.application.status.address=http://${singleYarnIp}:8088/ws/v1/cluster/apps/%s#g" conf/common/hadoop/hadoop.properties @@ -283,7 +289,7 @@ sed -i ${txt} "s#data.download.basedir.path.*#data.download.basedir.path=${downl sed -i ${txt} "s#process.exec.basepath.*#process.exec.basepath=${execPath}#g" conf/common/common.properties sed -i ${txt} "s#hdfs.root.user.*#hdfs.root.user=${hdfsRootUser}#g" conf/common/common.properties sed -i ${txt} "s#data.store2hdfs.basepath.*#data.store2hdfs.basepath=${hdfsPath}#g" conf/common/common.properties -sed -i ${txt} "s#hdfs.startup.state.*#hdfs.startup.state=${hdfsStartupSate}#g" conf/common/common.properties +sed -i ${txt} "s#res.upload.startup.type.*#res.upload.startup.type=${resUploadStartupType}#g" conf/common/common.properties sed -i ${txt} "s#escheduler.env.path.*#escheduler.env.path=${shellEnvPath}#g" conf/common/common.properties sed -i ${txt} "s#resource.view.suffixs.*#resource.view.suffixs=${resSuffixs}#g" conf/common/common.properties sed -i ${txt} "s#development.state.*#development.state=${devState}#g" conf/common/common.properties From 455d82a7400d3f5845f56ce37fb4f16066ed4fef Mon Sep 17 00:00:00 2001 From: qiaozhanwei <825193156@qq.com> Date: Thu, 27 Jun 2019 11:52:39 +0800 Subject: [PATCH 2/6] sqlTask update --- .../main/java/cn/escheduler/server/worker/task/sql/SqlTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java index 4eb567d8c8..dd10d05ddf 100644 --- a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java @@ -374,7 +374,7 @@ public class SqlTask extends AbstractTask { String showTypeName = sqlParameters.getShowType().replace(Constants.COMMA,"").trim(); if(EnumUtils.isValidEnum(ShowType.class,showTypeName)){ Map mailResult = MailUtils.sendMails(receviersList, receviersCcList, title, content, ShowType.valueOf(showTypeName)); - if(!(Boolean) mailResult.get(cn.escheduler.api.utils.Constants.STATUS)){ + if(!(Boolean) mailResult.get(Constants.STATUS)){ throw new RuntimeException("send mail failed!"); } }else{ From 729963fc63caddf5451ad0c2b10ca402375e7779 Mon Sep 17 00:00:00 2001 From: qiaozhanwei <825193156@qq.com> Date: Thu, 27 Jun 2019 13:53:51 +0800 Subject: [PATCH 3/6] tenant service update --- .../main/java/cn/escheduler/api/service/TenantService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java b/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java index ffaa4de7ad..35bb35a2b4 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/service/TenantService.java @@ -235,8 +235,8 @@ public class TenantService extends BaseService{ return result; } - // if hdfs startup - if (PropertyUtils.getBoolean(cn.escheduler.common.Constants.HDFS_STARTUP_STATE)){ + // if resource upload startup + if (PropertyUtils.getResUploadStartupState()){ String tenantPath = HadoopUtils.getHdfsDataBasePath() + "/" + tenant.getTenantCode(); String resourcePath = HadoopUtils.getHdfsDir(tenant.getTenantCode()); From ce8645bc69b8ef57fc90e60d0e1154444fa1d0f0 Mon Sep 17 00:00:00 2001 From: qiaozhanwei <825193156@qq.com> Date: Thu, 27 Jun 2019 16:09:22 +0800 Subject: [PATCH 4/6] add kerberos interface --- .../api/controller/DataSourceController.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java b/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java index 5a0b911581..4d1aaa6c28 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java @@ -34,9 +34,11 @@ import org.springframework.http.HttpStatus; import org.springframework.web.bind.annotation.*; import springfox.documentation.annotations.ApiIgnore; +import java.util.HashMap; import java.util.Map; import static cn.escheduler.api.enums.Status.*; +import static cn.escheduler.common.utils.PropertyUtils.getBoolean; /** @@ -429,4 +431,24 @@ public class DataSourceController extends BaseController { return error(AUTHORIZED_DATA_SOURCE.getCode(), AUTHORIZED_DATA_SOURCE.getMsg()); } } + + /** + * get user info + * + * @param loginUser + * @return + */ + @ApiOperation(value = "getKerberosStartupState", notes= "GET_USER_INFO_NOTES") + @GetMapping(value="/kerberos-startup-state") + @ResponseStatus(HttpStatus.OK) + public Result getKerberosStartupState(@ApiIgnore @RequestAttribute(value = Constants.SESSION_USER) User loginUser){ + logger.info("login user {},get user info : {}", loginUser.getUserName()); + try{ + Boolean kerberosStartupState = getBoolean(cn.escheduler.common.Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE); + return success(Status.SUCCESS.getMsg(),kerberosStartupState); + }catch (Exception e){ + logger.error(KERBEROS_STARTUP_STATE.getMsg(),e); + return error(Status.KERBEROS_STARTUP_STATE.getCode(), Status.KERBEROS_STARTUP_STATE.getMsg()); + } + } } From a0e1495f27f3ad05ece5167957546932fc0e740f Mon Sep 17 00:00:00 2001 From: qiaozhanwei <825193156@qq.com> Date: Thu, 27 Jun 2019 16:12:04 +0800 Subject: [PATCH 5/6] add kerberos interface --- escheduler-api/src/main/java/cn/escheduler/api/enums/Status.java | 1 + 1 file changed, 1 insertion(+) diff --git a/escheduler-api/src/main/java/cn/escheduler/api/enums/Status.java b/escheduler-api/src/main/java/cn/escheduler/api/enums/Status.java index 7d4a3c6381..bd1d5e91b3 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/enums/Status.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/enums/Status.java @@ -233,6 +233,7 @@ public enum Status { QUEUE_COUNT_ERROR(90001,"queue count error"), + KERBEROS_STARTUP_STATE(100001,"get kerberos startup state error"), ; private int code; From 3f217f5d00f21eac83320d2241448ca49a3701ed Mon Sep 17 00:00:00 2001 From: qiaozhanwei <825193156@qq.com> Date: Fri, 28 Jun 2019 14:39:02 +0800 Subject: [PATCH 6/6] add datasource kerberos auth and FAQ modify --- docs/zh_CN/EasyScheduler-FAQ.md | 267 +++++++++++++++--- .../api/controller/DataSourceController.java | 30 +- .../api/service/DataSourceService.java | 39 ++- .../cn/escheduler/api/utils/CheckUtils.java | 14 + .../cn/escheduler/api/utils/Constants.java | 1 + .../common/job/db/BaseDataSource.java | 20 +- .../common/job/db/HiveDataSource.java | 13 +- .../common/job/db/SparkDataSource.java | 3 +- .../pages/list/_source/createDataSource.vue | 41 ++- .../js/conf/home/store/datasource/actions.js | 9 + .../src/js/module/i18n/locale/en_US.js | 3 +- .../src/js/module/i18n/locale/zh_CN.js | 1 + install.sh | 2 +- 13 files changed, 372 insertions(+), 71 deletions(-) diff --git a/docs/zh_CN/EasyScheduler-FAQ.md b/docs/zh_CN/EasyScheduler-FAQ.md index e9f9d5d7ab..e6af99cae4 100644 --- a/docs/zh_CN/EasyScheduler-FAQ.md +++ b/docs/zh_CN/EasyScheduler-FAQ.md @@ -1,96 +1,283 @@ -Q:单机运行服务老挂,应该是内存不够,测试机器4核8G。生产环境需要分布式,如果单机的话建议的配置是? +## Q:EasyScheduler服务介绍及建议运行内存 -A: Easy Scheduler有5个服务组成,这些服务本身需要的内存和cpu不多, +A: EasyScheduler由5个服务组成,MasterServer、WorkerServer、ApiServer、AlertServer、LoggerServer和UI。 -| 服务 | 内存 | cpu核数 | -| ------------ | ---- | ------- | -| MasterServer | 2G | 2核 | -| WorkerServer | 2G | 2核 | -| ApiServer | 512M | 1核 | -| AlertServer | 512M | 1核 | -| LoggerServer | 512M | 1核 | +| 服务 | 说明 | +| ------------------------- | ------------------------------------------------------------ | +| MasterServer | 主要负责 **DAG** 的切分和任务状态的监控 | +| WorkerServer/LoggerServer | 主要负责任务的提交、执行和任务状态的更新。LoggerServer用于Rest Api通过 **RPC** 查看日志 | +| ApiServer | 提供Rest Api服务,供UI进行调用 | +| AlertServer | 提供告警服务 | +| UI | 前端页面展示 | -注意:由于如果任务较多,WorkServer所在机器建议物理内存在16G以上 +注意:**由于服务比较多,建议单机部署最好是4核16G以上** +--- + +## Q: 管理员为什么不能创建项目 + +A:管理员目前属于"**纯管理**", 没有租户,即没有linux上对应的用户,所以没有执行权限, **故没有所属的项目、资源及数据源**,所以没有创建权限。**但是有所有的查看权限**。如果需要创建项目等业务操作,**请使用管理员创建租户和普通用户,然后使用普通用户登录进行操作**。我们将会在1.1.0版本中将管理员的创建和执行权限放开,管理员将会有所有的权限 + +--- + +## Q:系统支持哪些邮箱? + +A:支持绝大多数邮箱,qq、163、126、139、outlook、aliyun等皆支持。支持**TLS和SSL**协议,可以在alert.properties中选择性配置 + +--- + +## Q:常用的系统变量时间参数有哪些,如何使用? + +A:请参考 https://analysys.github.io/easyscheduler_docs_cn/%E7%B3%BB%E7%BB%9F%E4%BD%BF%E7%94%A8%E6%89%8B%E5%86%8C.html#%E7%B3%BB%E7%BB%9F%E5%8F%82%E6%95%B0 + +--- +## Q:pip install kazoo 这个安装报错。是必须安装的吗? + +A: 这个是python连接zookeeper需要使用到的,必须要安装 --- -Q: 管理员为什么不能创建项目? +## Q: 怎么指定机器运行任务 -A: 管理员目前属于"纯管理", 没有租户,即没有linux上对应的用户,所以没有执行权限, 但是有所有的查看权限。如果需要创建项目等业务操作,请使用管理员创建租户和普通用户,然后使用普通用户登录进行操作 +A:使用 **管理员** 创建Worker分组,在 **流程定义启动** 的时候可**指定Worker分组**或者在**任务节点上指定Worker分组**。如果不指定,则使用Default,**Default默认是使用的集群里所有的Worker中随机选取一台来进行任务提交、执行** --- -Q: 系统支持哪些邮箱? +## Q:任务的优先级 + +A:我们同时 **支持流程和任务的优先级**。优先级我们有 **HIGHEST、HIGH、MEDIUM、LOW和LOWEST** 五种级别。**可以设置不同流程实例之间的优先级,也可以设置同一个流程实例中不同任务实例的优先级**。详细内容请参考任务优先级设计 https://analysys.github.io/easyscheduler_docs_cn/%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84%E8%AE%BE%E8%AE%A1.html#%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84%E8%AE%BE%E8%AE%A1 + +---- + +## Q:escheduler-grpc报错 + +A:在根目录下执行:mvn -U clean package assembly:assembly -Dmaven.test.skip=true , 然后刷新下整个项目 + +---- + +## Q:EasyScheduler支持windows上运行么 + +A: 理论上只有**Worker是需要在Linux上运行的**,其它的服务都是可以在windows上正常运行的。但是还是建议最好能在linux上部署使用 + +----- + +## Q:UI 在 linux 编译node-sass提示:Error:EACCESS:permission denied,mkdir xxxx -A: 支持绝大多数邮箱,qq、163、126、139、outlook、aliyun等皆可支持 +A:单独安装 **npm install node-sass --unsafe-perm**,之后再 **npm install** --- -Q:常用的系统变量时间参数有哪些,如何使用? +## Q:UI 不能正常登陆访问 -A: 请参考使用手册中的系统参数 +A: 1,如果是node启动的查看escheduler-ui下的.env API_BASE配置是否是Api Server服务地址 + +​ 2,如果是nginx启动的并且是通过 **install-escheduler-ui.sh** 安装的,查看 **/etc/nginx/conf.d/escheduler.conf** 中的proxy_pass配置是否是Api Server服务地址 + +​ 3,如果以上配置都是正确的,那么请查看Api Server服务是否是正常的,curl http://192.168.xx.xx:12345/escheduler/users/get-user-info,查看Api Server日志,如果提示 cn.escheduler.api.interceptor.LoginHandlerInterceptor:[76] - session info is null,则证明Api Server服务是正常的 + +​ 4,如果以上都没有问题,需要查看一下 **application.properties** 中的 **server.context-path 和 server.port 配置**是否正确 --- -Q:pip install kazoo 这个安装报错。是必须安装的吗? +## Q: 流程定义手动启动或调度启动之后,没有流程实例生成 + +A: 1,首先通过**jps 查看MasterServer服务是否存在**,或者从服务监控直接查看zk中是否存在master服务 -A: 这个是python连接zookeeper需要使用到的 +​ 2,如果存在master服务,查看 **命令状态统计** 或者 **t_escheduler_error_command** 中是否增加的新记录,如果增加了,**请查看 message 字段定位启动异常原因** --- -Q: 如果alert、api、logger服务任意一个宕机,任何还会正常执行吧 +## Q : 任务状态一直处于提交成功状态 -A: 不影响,影响正在运行中的任务的服务有Master和Worker服务 +A: 1,首先通过**jps 查看WorkerServer服务是否存在**,或者从服务监控直接查看zk中是否存在worker服务 + +​ 2,如果 **WorkerServer** 服务正常,需要 **查看MasterServer是否把task任务放到zk队列中** ,**需要查看MasterServer日志及zk队列中是否有任务阻塞** + +​ 3,如果以上都没有问题,需要定位是否指定了Worker分组,但是 **Worker分组的机器不是在线状态** --- -Q: 这个怎么指定机器运行任务的啊 」 +## Q: 是否提供Docker镜像及Dockerfile + +A: 提供Docker镜像及Dockerfile。 -A: 通过worker分组: 这个流程只能在指定的机器组里执行。默认是Default,可以在任一worker上执行。 +Docker镜像地址:https://hub.docker.com/r/escheduler/escheduler_images + +Dockerfile地址:https://github.com/qiaozhanwei/escheduler_dockerfile/tree/master/docker_escheduler --- -Q: 跨用户的任务依赖怎么实现呢, 比如A用户写了一个任务,B用户需要依赖这个任务 +## Q : install.sh 中需要注意问题 + +A: 1,如果替换变量中包含特殊字符,**请用 \ 转移符进行转移** + +​ 2,installPath="/data1_1T/escheduler",**这个目录不能和当前要一键安装的install.sh目录是一样的** -就比如说 我们数仓组 写了一个 中间宽表的任务, 其他业务部门想要使用这个中间表的时候,他们应该是另外一个用户,怎么依赖这个中间表呢 +​ 3,deployUser="escheduler",**部署用户必须具有sudo权限**,因为worker是通过sudo -u 租户 sh xxx.command进行执行的 -A: 有两种情况,一个是要运行这个宽表任务,可以使用子工作流把宽表任务放到自己的工作流里面。另一个是检查这个宽表任务有没有完成,可以使用依赖节点来检查这个宽表任务在指定的时间周期有没有完成。 +​ 4,monitorServerState="false",服务监控脚本是否启动,默认是不启动服务监控脚本的。**如果启动服务监控脚本,则每5分钟定时来监控master和worker的服务是否down机,如果down机则会自动重启** + +​ 5,hdfsStartupSate="false",是否开启HDFS资源上传功能。默认是不开启的,**如果不开启则资源中心是不能使用的**。如果开启,需要conf/common/hadoop/hadoop.properties中配置fs.defaultFS和yarn的相关配置,如果使用namenode HA,需要将core-site.xml和hdfs-site.xml复制到conf根目录下 + +​ 注意:**1.0.x版本是不会自动创建hdfs根目录的,需要自行创建,并且需要部署用户有hdfs的操作权限** --- -Q: 启动WorkerServer服务时不能正常启动,报以下信息是什么原因? +## Q : 流程定义和流程实例下线异常 + +A : 对于 **1.0.4 以前的版本中**,修改escheduler-api cn.escheduler.api.quartz包下的代码即可 ``` -[INFO] 2019-05-06 16:39:31.492 cn.escheduler.server.zk.ZKWorkerClient:[155] - register failure , worker already started on : 127.0.0.1, please wait for a moment and try again +public boolean deleteJob(String jobName, String jobGroupName) { + lock.writeLock().lock(); + try { + JobKey jobKey = new JobKey(jobName,jobGroupName); + if(scheduler.checkExists(jobKey)){ + logger.info("try to delete job, job name: {}, job group name: {},", jobName, jobGroupName); + return scheduler.deleteJob(jobKey); + }else { + return true; + } + + } catch (SchedulerException e) { + logger.error(String.format("delete job : %s failed",jobName), e); + } finally { + lock.writeLock().unlock(); + } + return false; + } ``` -A:Worker/Master Server在启动时,会向Zookeeper注册自己的启动信息,是Zookeeper的临时节点,如果两次启动时间间隔较短的情况,上次启动的Worker/Master Server在Zookeeper的会话还未过期,会出现上述信息,处理办法是等待session过期,一般是1分钟左右 +--- ----- +## Q : HDFS启动之前创建的租户,能正常使用资源中心吗 -Q: 编译时escheduler-grpc模块一直报错:Information:java: Errors occurred while compiling module 'escheduler-rpc', 找不到LogParameter、RetStrInfo、RetByteInfo等class类 +A: 不能。因为在未启动HDFS创建的租户,不会在HDFS中注册租户目录。所以上次资源会报错 -A: 这是因为rpc源码包是google Grpc实现的,需要使用maven进行编译,在根目录下执行:mvn -U clean package assembly:assembly -Dmaven.test.skip=true , 然后刷新下整个项目 +## Q : 多Master和多Worker状态下,服务掉了,怎么容错 ----- +A: **注意:Master监控Master及Worker服务。** -Q:EasyScheduler支持windows上运行么? +​ 1,如果Master服务掉了,其它的Master会接管挂掉的Master的流程,继续监控Worker task状态 -A: 建议在Ubuntu、Centos上运行,暂不支持windows上运行,不过windows上可以进行编译。开发调试的话建议Ubuntu或者mac上进行。 +​ 2,如果Worker服务掉,Master会监控到Worker服务掉了,如果存在Yarn任务,Kill Yarn任务之后走重试 + +具体请看容错设计:https://analysys.github.io/easyscheduler_docs_cn/%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84%E8%AE%BE%E8%AE%A1.html#%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84%E8%AE%BE%E8%AE%A1 + +--- + +## Q : 对于Master和Worker一台机器伪分布式下的容错 + +A : 1.0.3 版本只实现了Master启动流程容错,不走Worker容错。也就是说如果Worker挂掉的时候,没有Master存在。这流程将会出现问题。我们会在 **1.1.0** 版本中增加Master和Worker启动自容错,修复这个问题。如果想手动修改这个问题,需要针对 **跨重启正在运行流程** **并且已经掉的正在运行的Worker任务,需要修改为失败**,**同时跨重启正在运行流程设置为失败状态**。然后从失败节点进行流程恢复即可 + +--- + +## Q : 定时容易设置成每秒执行 + +A : 设置定时的时候需要注意,如果第一位(* * * * * ? *)设置成 \* ,则表示每秒执行。**我们将会在1.1.0版本中加入显示最近调度的时间列表** ,使用http://cron.qqe2.com/ 可以在线看近5次运行时间 ------ -Q:任务为什么不执行? -A: 不执行的原因: +## Q: 定时有有效时间范围吗 -查看command表里有没有内容? +A:有的,**如果定时的起止时间是同一个时间,那么此定时将是无效的定时**。**如果起止时间的结束时间比当前的时间小,很有可能定时会被自动删除** -查看Master server的运行日志: -查看Worker Server的运行日志 +## Q : 任务依赖有几种实现 +A: 1,**DAG** 之间的任务依赖关系,是从 **入度为零** 进行DAG切分的 + +​ 2,有 **任务依赖节点** ,可以实现跨流程的任务或者流程依赖,具体请参考 依赖(DEPENDENT)节点:https://analysys.github.io/easyscheduler_docs_cn/%E7%B3%BB%E7%BB%9F%E4%BD%BF%E7%94%A8%E6%89%8B%E5%86%8C.html#%E4%BB%BB%E5%8A%A1%E8%8A%82%E7%82%B9%E7%B1%BB%E5%9E%8B%E5%92%8C%E5%8F%82%E6%95%B0%E8%AE%BE%E7%BD%AE + +​ 注意:**不支持跨项目的流程或任务依赖** + +## Q: 流程定义有几种启动方式 + +A: 1,在 **流程定义列表**,点击 **启动** 按钮 + +​ 2,**流程定义列表添加定时器**,调度启动流程定义 + +​ 3,流程定义 **查看或编辑** DAG 页面,任意 **任务节点右击** 启动流程定义 + +​ 4,可以对流程定义 DAG 编辑,设置某些任务的运行标志位 **禁止运行**,则在启动流程定义的时候,将该节点的连线将从DAG中去掉 + +## Q : Python任务设置Python版本 + +A: 1,对于1**.0.3之后的版本**只需要修改 conf/env/.escheduler_env.sh中的PYTHON_HOME + +``` +export PYTHON_HOME=/bin/python +``` + +注意:这了 **PYTHON_HOME** ,是python命令的绝对路径,而不是单纯的 PYTHON_HOME,还需要注意的是 export PATH 的时候,需要直接 + +``` +export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH +``` + +​ 2,对 1.0.3 之前的版本,Python任务只能支持系统的Python版本,不支持指定Python版本 + +## Q: Worker Task 通过sudo -u 租户 sh xxx.command会产生子进程,在kill的时候,是否会杀掉 + +A: 我们会在1.0.4中增加kill任务同时,kill掉任务产生的各种所有子进程 + + + +## Q : EasyScheduler中的队列怎么用,用户队列和租户队列是什么意思 + +A : EasyScheduler 中的队列可以在用户或者租户上指定队列,**用户指定的队列优先级是高于租户队列的优先级的。**,例如:对MR任务指定队列,是通过 mapreduce.job.queuename 来指定队列的。 + +注意:MR在用以上方法指定队列的时候,传递参数请使用如下方式: + +``` + Configuration conf = new Configuration(); + GenericOptionsParser optionParser = new GenericOptionsParser(conf, args); + String[] remainingArgs = optionParser.getRemainingArgs(); +``` + + + +如果是Spark任务 --queue 方式指定队列 + + + +## Q : Master 后者 Worker报如下告警 + +![1560847965302](C:\Users\Administrator\Desktop\FAQ\1560847965302.png) + + + +A : 修改conf下的 master.properties **master.reserved.memory** 的值为更小的值,比如说0.1 或者 + +worker.properties **worker.reserved.memory** 的值为更小的值,比如说0.1 + + + +## Q : hive版本是1.1.0+cdh5.15.0,SQL hive任务连接报错 + +![EF4DA613-5129-4c7a-A0AB-61E5A866A919](C:\Users\Administrator\Desktop\FAQ\EF4DA613-5129-4c7a-A0AB-61E5A866A919.png) + + + +A : 将 hive pom + +``` + + org.apache.hive + hive-jdbc + 2.1.0 + +``` + +修改为 + +``` + + org.apache.hive + hive-jdbc + 1.1.0 + +``` diff --git a/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java b/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java index 4d1aaa6c28..51e0c93bd6 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/controller/DataSourceController.java @@ -18,10 +18,13 @@ package cn.escheduler.api.controller; import cn.escheduler.api.enums.Status; import cn.escheduler.api.service.DataSourceService; +import cn.escheduler.api.utils.CheckUtils; import cn.escheduler.api.utils.Constants; import cn.escheduler.api.utils.Result; import cn.escheduler.common.enums.DbType; +import cn.escheduler.common.enums.ResUploadType; import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.common.utils.PropertyUtils; import cn.escheduler.dao.model.User; import io.swagger.annotations.Api; import io.swagger.annotations.ApiImplicitParam; @@ -56,12 +59,16 @@ public class DataSourceController extends BaseController { /** * create data source - * 创建数据源 - * * @param loginUser * @param name * @param note * @param type + * @param host + * @param port + * @param database + * @param principal + * @param userName + * @param password * @param other * @return */ @@ -86,13 +93,14 @@ public class DataSourceController extends BaseController { @RequestParam(value = "host") String host, @RequestParam(value = "port") String port, @RequestParam(value = "database") String database, + @RequestParam(value = "principal") String principal, @RequestParam(value = "userName") String userName, @RequestParam(value = "password") String password, @RequestParam(value = "other") String other) { - logger.info("login user {} create datasource ame: {}, note: {}, type: {}, other: {}", - loginUser.getUserName(), name, note, type, other); + logger.info("login user {} create datasource name: {}, note: {}, type: {}, host: {},port: {},database : {},principal: {},userName : {} other: {}", + loginUser.getUserName(), name, note, type, host,port,database,principal,userName,other); try { - String parameter = dataSourceService.buildParameter(name, note, type, host, port, database, userName, password, other); + String parameter = dataSourceService.buildParameter(name, note, type, host, port, database,principal,userName, password, other); Map result = dataSourceService.createDataSource(loginUser, name, note, type, parameter); return returnDataList(result); @@ -136,13 +144,14 @@ public class DataSourceController extends BaseController { @RequestParam(value = "host") String host, @RequestParam(value = "port") String port, @RequestParam(value = "database") String database, + @RequestParam(value = "principal") String principal, @RequestParam(value = "userName") String userName, @RequestParam(value = "password") String password, @RequestParam(value = "other") String other) { logger.info("login user {} updateProcessInstance datasource name: {}, note: {}, type: {}, other: {}", loginUser.getUserName(), name, note, type, other); try { - String parameter = dataSourceService.buildParameter(name, note, type, host, port, database, userName, password, other); + String parameter = dataSourceService.buildParameter(name, note, type, host, port, database,principal, userName, password, other); Map dataSource = dataSourceService.updateDataSource(id, loginUser, name, note, type, parameter); return returnDataList(dataSource); } catch (Exception e) { @@ -271,13 +280,14 @@ public class DataSourceController extends BaseController { @RequestParam(value = "host") String host, @RequestParam(value = "port") String port, @RequestParam(value = "database") String database, + @RequestParam(value = "principal") String principal, @RequestParam(value = "userName") String userName, @RequestParam(value = "password") String password, @RequestParam(value = "other") String other) { logger.info("login user {}, connect datasource: {} failure, note: {}, type: {}, other: {}", loginUser.getUserName(), name, note, type, other); try { - String parameter = dataSourceService.buildParameter(name, note, type, host, port, database, userName, password, other); + String parameter = dataSourceService.buildParameter(name, note, type, host, port, database,principal,userName, password, other); Boolean isConnection = dataSourceService.checkConnection(type, parameter); Result result = new Result(); @@ -442,10 +452,10 @@ public class DataSourceController extends BaseController { @GetMapping(value="/kerberos-startup-state") @ResponseStatus(HttpStatus.OK) public Result getKerberosStartupState(@ApiIgnore @RequestAttribute(value = Constants.SESSION_USER) User loginUser){ - logger.info("login user {},get user info : {}", loginUser.getUserName()); + logger.info("login user {},get kerberos startup state : {}", loginUser.getUserName()); try{ - Boolean kerberosStartupState = getBoolean(cn.escheduler.common.Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE); - return success(Status.SUCCESS.getMsg(),kerberosStartupState); + // if upload resource is HDFS and kerberos startup is true , else false + return success(Status.SUCCESS.getMsg(), CheckUtils.getKerberosStartupState()); }catch (Exception e){ logger.error(KERBEROS_STARTUP_STATE.getMsg(),e); return error(Status.KERBEROS_STARTUP_STATE.getCode(), Status.KERBEROS_STARTUP_STATE.getMsg()); diff --git a/escheduler-api/src/main/java/cn/escheduler/api/service/DataSourceService.java b/escheduler-api/src/main/java/cn/escheduler/api/service/DataSourceService.java index 9081a436cf..02164f971b 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/service/DataSourceService.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/service/DataSourceService.java @@ -17,12 +17,15 @@ package cn.escheduler.api.service; import cn.escheduler.api.enums.Status; +import cn.escheduler.api.utils.CheckUtils; import cn.escheduler.api.utils.Constants; import cn.escheduler.api.utils.PageInfo; import cn.escheduler.api.utils.Result; import cn.escheduler.common.enums.DbType; +import cn.escheduler.common.enums.ResUploadType; import cn.escheduler.common.enums.UserType; import cn.escheduler.common.job.db.*; +import cn.escheduler.common.utils.PropertyUtils; import cn.escheduler.dao.mapper.DataSourceMapper; import cn.escheduler.dao.mapper.DatasourceUserMapper; import cn.escheduler.dao.mapper.ProjectMapper; @@ -31,6 +34,8 @@ import cn.escheduler.dao.model.Resource; import cn.escheduler.dao.model.User; import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.TypeReference; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -42,6 +47,9 @@ import java.sql.DriverManager; import java.sql.SQLException; import java.util.*; +import static cn.escheduler.common.utils.PropertyUtils.getBoolean; +import static cn.escheduler.common.utils.PropertyUtils.getString; + /** * datasource service */ @@ -55,6 +63,7 @@ public class DataSourceService extends BaseService{ public static final String TYPE = "type"; public static final String HOST = "host"; public static final String PORT = "port"; + public static final String PRINCIPAL = "principal"; public static final String DATABASE = "database"; public static final String USER_NAME = "userName"; public static final String PASSWORD = "password"; @@ -240,6 +249,7 @@ public class DataSourceService extends BaseService{ map.put(TYPE, dataSourceType); map.put(HOST, host); map.put(PORT, port); + map.put(PRINCIPAL, datasourceForm.getPrincipal()); map.put(DATABASE, database); map.put(USER_NAME, datasourceForm.getUser()); map.put(PASSWORD, datasourceForm.getPassword()); @@ -363,11 +373,21 @@ public class DataSourceService extends BaseService{ Class.forName(Constants.COM_MYSQL_JDBC_DRIVER); break; case HIVE: - datasource = JSONObject.parseObject(parameter, HiveDataSource.class); - Class.forName(Constants.ORG_APACHE_HIVE_JDBC_HIVE_DRIVER); - break; case SPARK: - datasource = JSONObject.parseObject(parameter, SparkDataSource.class); + if (CheckUtils.getKerberosStartupState()) { + System.setProperty(cn.escheduler.common.Constants.JAVA_SECURITY_KRB5_CONF, + getString(cn.escheduler.common.Constants.JAVA_SECURITY_KRB5_CONF_PATH)); + Configuration configuration = new Configuration(); + configuration.set(cn.escheduler.common.Constants.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); + UserGroupInformation.setConfiguration(configuration); + UserGroupInformation.loginUserFromKeytab(getString(cn.escheduler.common.Constants.LOGIN_USER_KEY_TAB_USERNAME), + getString(cn.escheduler.common.Constants.LOGIN_USER_KEY_TAB_PATH)); + } + if (dbType == DbType.HIVE){ + datasource = JSONObject.parseObject(parameter, HiveDataSource.class); + }else if (dbType == DbType.SPARK){ + datasource = JSONObject.parseObject(parameter, SparkDataSource.class); + } Class.forName(Constants.ORG_APACHE_HIVE_JDBC_HIVE_DRIVER); break; case CLICKHOUSE: @@ -443,10 +463,18 @@ public class DataSourceService extends BaseService{ * @param other * @return */ - public String buildParameter(String name, String desc, DbType type, String host, String port, String database, String userName, String password, String other) { + public String buildParameter(String name, String desc, DbType type, String host, + String port, String database,String principal,String userName, + String password, String other) { String address = buildAddress(type, host, port); + String jdbcUrl = address + "/" + database; + if (CheckUtils.getKerberosStartupState() && + (type == DbType.HIVE || type == DbType.SPARK)){ + jdbcUrl += ";principal=" + principal; + } + String separator = ""; if (Constants.MYSQL.equals(type.name()) || Constants.POSTGRESQL.equals(type.name()) @@ -465,6 +493,7 @@ public class DataSourceService extends BaseService{ parameterMap.put(Constants.JDBC_URL, jdbcUrl); parameterMap.put(Constants.USER, userName); parameterMap.put(Constants.PASSWORD, password); + parameterMap.put(Constants.PRINCIPAL,principal); if (other != null && !"".equals(other)) { Map map = JSONObject.parseObject(other, new TypeReference>() { }); diff --git a/escheduler-api/src/main/java/cn/escheduler/api/utils/CheckUtils.java b/escheduler-api/src/main/java/cn/escheduler/api/utils/CheckUtils.java index 6a8c627d7b..f6330b79de 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/utils/CheckUtils.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/utils/CheckUtils.java @@ -18,8 +18,10 @@ package cn.escheduler.api.utils; import cn.escheduler.api.enums.Status; +import cn.escheduler.common.enums.ResUploadType; import cn.escheduler.common.task.AbstractParameters; import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.common.utils.PropertyUtils; import cn.escheduler.common.utils.TaskParametersUtils; import org.apache.commons.lang.StringUtils; @@ -28,6 +30,7 @@ import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; +import static cn.escheduler.common.utils.PropertyUtils.getBoolean; /** @@ -158,4 +161,15 @@ public class CheckUtils { return pattern.matcher(str).matches(); } + /** + * if upload resource is HDFS and kerberos startup is true , else false + * @return + */ + public static boolean getKerberosStartupState(){ + String resUploadStartupType = PropertyUtils.getString(cn.escheduler.common.Constants.RES_UPLOAD_STARTUP_TYPE); + ResUploadType resUploadType = ResUploadType.valueOf(resUploadStartupType); + Boolean kerberosStartupState = getBoolean(cn.escheduler.common.Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE); + return resUploadType == ResUploadType.HDFS && kerberosStartupState; + } + } diff --git a/escheduler-api/src/main/java/cn/escheduler/api/utils/Constants.java b/escheduler-api/src/main/java/cn/escheduler/api/utils/Constants.java index 79cf3e5b3f..1dfe3ac470 100644 --- a/escheduler-api/src/main/java/cn/escheduler/api/utils/Constants.java +++ b/escheduler-api/src/main/java/cn/escheduler/api/utils/Constants.java @@ -111,6 +111,7 @@ public class Constants { public static final String ADDRESS = "address"; public static final String DATABASE = "database"; public static final String JDBC_URL = "jdbcUrl"; + public static final String PRINCIPAL = "principal"; public static final String USER = "user"; public static final String PASSWORD = "password"; public static final String OTHER = "other"; diff --git a/escheduler-common/src/main/java/cn/escheduler/common/job/db/BaseDataSource.java b/escheduler-common/src/main/java/cn/escheduler/common/job/db/BaseDataSource.java index af0624091a..f215d3e8c9 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/job/db/BaseDataSource.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/job/db/BaseDataSource.java @@ -45,6 +45,18 @@ public abstract class BaseDataSource { */ private String other; + /** + * principal + */ + private String principal; + + public String getPrincipal() { + return principal; + } + + public void setPrincipal(String principal) { + this.principal = principal; + } /** * test whether the data source can be connected successfully * @throws Exception @@ -73,14 +85,14 @@ public abstract class BaseDataSource { this.password = password; } - public String getAddress() { - return address; - } - public void setAddress(String address) { this.address = address; } + public String getAddress() { + return address; + } + public String getDatabase() { return database; } diff --git a/escheduler-common/src/main/java/cn/escheduler/common/job/db/HiveDataSource.java b/escheduler-common/src/main/java/cn/escheduler/common/job/db/HiveDataSource.java index 28e37991d7..719c5eb300 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/job/db/HiveDataSource.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/job/db/HiveDataSource.java @@ -17,12 +17,12 @@ package cn.escheduler.common.job.db; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; +import java.sql.*; /** * data source of hive @@ -32,6 +32,8 @@ public class HiveDataSource extends BaseDataSource { private static final Logger logger = LoggerFactory.getLogger(HiveDataSource.class); + + /** * gets the JDBC url for the data source connection * @return @@ -43,7 +45,7 @@ public class HiveDataSource extends BaseDataSource { jdbcUrl += "/"; } - jdbcUrl += getDatabase(); + jdbcUrl += getDatabase() + ";principal=" + getPrincipal(); if (StringUtils.isNotEmpty(getOther())) { jdbcUrl += ";" + getOther(); @@ -67,11 +69,10 @@ public class HiveDataSource extends BaseDataSource { try { con.close(); } catch (SQLException e) { - logger.error("Postgre datasource try conn close conn error", e); + logger.error("hive datasource try conn close conn error", e); throw e; } } } - } } diff --git a/escheduler-common/src/main/java/cn/escheduler/common/job/db/SparkDataSource.java b/escheduler-common/src/main/java/cn/escheduler/common/job/db/SparkDataSource.java index d9a24eef22..13aa06eaae 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/job/db/SparkDataSource.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/job/db/SparkDataSource.java @@ -31,7 +31,6 @@ public class SparkDataSource extends BaseDataSource { private static final Logger logger = LoggerFactory.getLogger(SparkDataSource.class); - /** * gets the JDBC url for the data source connection * @return @@ -43,7 +42,7 @@ public class SparkDataSource extends BaseDataSource { jdbcUrl += "/"; } - jdbcUrl += getDatabase(); + jdbcUrl += getDatabase() + ";principal=" + getPrincipal(); if (StringUtils.isNotEmpty(getOther())) { jdbcUrl += ";" + getOther(); diff --git a/escheduler-ui/src/js/conf/home/pages/datasource/pages/list/_source/createDataSource.vue b/escheduler-ui/src/js/conf/home/pages/datasource/pages/list/_source/createDataSource.vue index 8b09570862..cc6d25e7de 100644 --- a/escheduler-ui/src/js/conf/home/pages/datasource/pages/list/_source/createDataSource.vue +++ b/escheduler-ui/src/js/conf/home/pages/datasource/pages/list/_source/createDataSource.vue @@ -63,6 +63,17 @@ + + + +