diff --git a/escheduler-common/src/main/java/cn/escheduler/common/Constants.java b/escheduler-common/src/main/java/cn/escheduler/common/Constants.java index 07495c5e48..ddd27170db 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/Constants.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/Constants.java @@ -70,6 +70,11 @@ public final class Constants { */ public static final String YARN_APPLICATION_STATUS_ADDRESS = "yarn.application.status.address"; + /** + * hdfs configuration + * hdfs.root.user + */ + public static final String HDFS_ROOT_USER = "hdfs.root.user"; /** * hdfs configuration diff --git a/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java b/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java index bedf030e0c..85716bec81 100644 --- a/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java +++ b/escheduler-common/src/main/java/cn/escheduler/common/utils/HadoopUtils.java @@ -24,17 +24,15 @@ import com.alibaba.fastjson.JSONObject; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.client.cli.RMAdminCLI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; +import java.security.PrivilegedExceptionAction; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -51,18 +49,41 @@ public class HadoopUtils implements Closeable { private static final Logger logger = LoggerFactory.getLogger(HadoopUtils.class); + private static String hdfsUser = PropertyUtils.getString(Constants.HDFS_ROOT_USER); private static volatile HadoopUtils instance = new HadoopUtils(); private static volatile Configuration configuration; private static FileSystem fs; + private HadoopUtils(){ + if(StringUtils.isEmpty(hdfsUser)){ + hdfsUser = PropertyUtils.getString(Constants.HDFS_ROOT_USER); + } init(); + initHdfsPath(); } public static HadoopUtils getInstance(){ return instance; } + /** + * init escheduler root path in hdfs + */ + private void initHdfsPath(){ + String hdfsPath = getString(Constants.DATA_STORE_2_HDFS_BASEPATH); + Path path = new Path(hdfsPath); + + try { + if (!fs.exists(path)) { + fs.mkdirs(path); + } + } catch (Exception e) { + logger.error(e.getMessage(),e); + } + } + + /** * init hadoop configuration */ @@ -100,7 +121,20 @@ public class HadoopUtils implements Closeable { } if (fs == null) { - fs = FileSystem.get(configuration); + if(StringUtils.isNotEmpty(hdfsUser)){ + //UserGroupInformation ugi = UserGroupInformation.createProxyUser(hdfsUser,UserGroupInformation.getLoginUser()); + UserGroupInformation ugi = UserGroupInformation.createRemoteUser(hdfsUser); + ugi.doAs(new PrivilegedExceptionAction() { + @Override + public Boolean run() throws Exception { + fs = FileSystem.get(configuration); + return true; + } + }); + }else{ + logger.warn("hdfs.root.user is not set value!"); + fs = FileSystem.get(configuration); + } } String rmHaIds = getString(YARN_RESOURCEMANAGER_HA_RM_IDS); String appAddress = getString(Constants.YARN_APPLICATION_STATUS_ADDRESS); diff --git a/escheduler-common/src/main/resources/common/common.properties b/escheduler-common/src/main/resources/common/common.properties index 6a40a992ee..357ba2cbc6 100644 --- a/escheduler-common/src/main/resources/common/common.properties +++ b/escheduler-common/src/main/resources/common/common.properties @@ -10,6 +10,9 @@ data.download.basedir.path=/tmp/escheduler/download # process execute directory. self configuration, please make sure the directory exists and have read write permissions process.exec.basepath=/tmp/escheduler/exec +# Users who have permission to create directories under the HDFS root path +hdfs.root.user=hdfs + # data base dir, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions。"/escheduler" is recommended data.store2hdfs.basepath=/escheduler diff --git a/install.sh b/install.sh index cda6fde520..2245409986 100644 --- a/install.sh +++ b/install.sh @@ -128,6 +128,9 @@ singleYarnIp="ark1" # hdfs根路径,根路径的owner必须是部署用户。1.1.0之前版本不会自动创建hdfs根目录,需要自行创建 hdfsPath="/escheduler" +# 拥有在hdfs根路径/下创建目录权限的用户 +hdfsRootUser="hdfs" + # common 配置 # 程序路径 programPath="/tmp/escheduler" @@ -278,6 +281,7 @@ sed -i ${txt} "s#yarn.application.status.address.*#yarn.application.status.addre sed -i ${txt} "s#data.basedir.path.*#data.basedir.path=${programPath}#g" conf/common/common.properties sed -i ${txt} "s#data.download.basedir.path.*#data.download.basedir.path=${downloadPath}#g" conf/common/common.properties sed -i ${txt} "s#process.exec.basepath.*#process.exec.basepath=${execPath}#g" conf/common/common.properties +sed -i ${txt} "s#hdfs.root.user.*#hdfs.root.user=${hdfsRootUser}#g" conf/common/common.properties sed -i ${txt} "s#data.store2hdfs.basepath.*#data.store2hdfs.basepath=${hdfsPath}#g" conf/common/common.properties sed -i ${txt} "s#hdfs.startup.state.*#hdfs.startup.state=${hdfsStartupSate}#g" conf/common/common.properties sed -i ${txt} "s#escheduler.env.path.*#escheduler.env.path=${shellEnvPath}#g" conf/common/common.properties