Browse Source

Merge pull request #2943 from Eights-Li/dev-sqoop-optimization

Sqoop task optimization
pull/2/head
Rubik-W 5 years ago committed by GitHub
parent
commit
a7fd0a523c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/enums/Status.java
  2. 41
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/SqoopJobType.java
  3. 99
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/task/sqoop/SqoopParameters.java
  4. 2
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/task/sqoop/targets/TargetMysqlParameter.java
  5. 32
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/consumer/TaskPriorityQueueConsumer.java
  6. 39
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/CommonGenerator.java
  7. 22
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/SqoopJobGenerator.java
  8. 14
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/sources/MysqlSourceGenerator.java
  9. 2
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/targets/MysqlTargetGenerator.java
  10. 126
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/task/sqoop/SqoopTaskTest.java
  11. 2
      dolphinscheduler-ui/src/js/conf/home/pages/dag/_source/formModel/tasks/_source/datasource.vue
  12. 962
      dolphinscheduler-ui/src/js/conf/home/pages/dag/_source/formModel/tasks/sqoop.vue
  13. 7
      dolphinscheduler-ui/src/js/module/i18n/locale/en_US.js
  14. 7
      dolphinscheduler-ui/src/js/module/i18n/locale/zh_CN.js
  15. 1
      pom.xml

2
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/enums/Status.java

@ -218,7 +218,7 @@ public enum Status {
DATA_IS_NOT_VALID(50017,"data {0} not valid", "数据[{0}]无效"),
DATA_IS_NULL(50018,"data {0} is null", "数据[{0}]不能为空"),
PROCESS_NODE_HAS_CYCLE(50019,"process node has cycle", "流程节点间存在循环依赖"),
PROCESS_NODE_S_PARAMETER_INVALID(50020,"process node %s parameter invalid", "流程节点[%s]参数无效"),
PROCESS_NODE_S_PARAMETER_INVALID(50020,"process node {0} parameter invalid", "流程节点[{0}]参数无效"),
PROCESS_DEFINE_STATE_ONLINE(50021, "process definition {0} is already on line", "工作流定义[{0}]已上线"),
DELETE_PROCESS_DEFINE_BY_ID_ERROR(50022,"delete process definition by id error", "删除工作流定义错误"),
SCHEDULE_CRON_STATE_ONLINE(50023,"the status of schedule {0} is already on line", "调度配置[{0}]已上线"),

41
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/SqoopJobType.java

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.common.enums;
import com.baomidou.mybatisplus.annotation.EnumValue;
public enum SqoopJobType {
CUSTOM(0, "CUSTOM"),
TEMPLATE(1, "TEMPLATE");
SqoopJobType(int code, String descp){
this.code = code;
this.descp = descp;
}
@EnumValue
private final int code;
private final String descp;
public int getCode() {
return code;
}
public String getDescp() {
return descp;
}
}

99
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/task/sqoop/SqoopParameters.java

@ -16,6 +16,8 @@
*/
package org.apache.dolphinscheduler.common.task.sqoop;
import org.apache.dolphinscheduler.common.enums.SqoopJobType;
import org.apache.dolphinscheduler.common.process.Property;
import org.apache.dolphinscheduler.common.process.ResourceInfo;
import org.apache.dolphinscheduler.common.task.AbstractParameters;
import org.apache.dolphinscheduler.common.utils.StringUtils;
@ -28,6 +30,23 @@ import java.util.List;
*/
public class SqoopParameters extends AbstractParameters {
/**
* sqoop job type:
* CUSTOM - custom sqoop job
* TEMPLATE - sqoop template job
*/
private String jobType;
/**
* customJob eq 1, use customShell
*/
private String customShell;
/**
* sqoop job name - map-reduce job name
*/
private String jobName;
/**
* model type
*/
@ -53,6 +72,16 @@ public class SqoopParameters extends AbstractParameters {
*/
private String targetParams;
/**
* hadoop custom param for sqoop job
*/
private List<Property> hadoopCustomParams;
/**
* sqoop advanced param
*/
private List<Property> sqoopAdvancedParams;
public String getModelType() {
return modelType;
}
@ -101,18 +130,74 @@ public class SqoopParameters extends AbstractParameters {
this.targetParams = targetParams;
}
public String getJobType() {
return jobType;
}
public void setJobType(String jobType) {
this.jobType = jobType;
}
public String getJobName() {
return jobName;
}
public void setJobName(String jobName) {
this.jobName = jobName;
}
public String getCustomShell() {
return customShell;
}
public void setCustomShell(String customShell) {
this.customShell = customShell;
}
public List<Property> getHadoopCustomParams() {
return hadoopCustomParams;
}
public void setHadoopCustomParams(List<Property> hadoopCustomParams) {
this.hadoopCustomParams = hadoopCustomParams;
}
public List<Property> getSqoopAdvancedParams() {
return sqoopAdvancedParams;
}
public void setSqoopAdvancedParams(List<Property> sqoopAdvancedParams) {
this.sqoopAdvancedParams = sqoopAdvancedParams;
}
@Override
public boolean checkParameters() {
return StringUtils.isNotEmpty(modelType)&&
concurrency != 0 &&
StringUtils.isNotEmpty(sourceType)&&
StringUtils.isNotEmpty(targetType)&&
StringUtils.isNotEmpty(sourceParams)&&
StringUtils.isNotEmpty(targetParams);
boolean sqoopParamsCheck = false;
if (StringUtils.isEmpty(jobType)) {
return sqoopParamsCheck;
}
if (SqoopJobType.TEMPLATE.getDescp().equals(jobType)) {
sqoopParamsCheck = StringUtils.isEmpty(customShell) &&
StringUtils.isNotEmpty(modelType) &&
StringUtils.isNotEmpty(jobName) &&
concurrency != 0 &&
StringUtils.isNotEmpty(sourceType) &&
StringUtils.isNotEmpty(targetType) &&
StringUtils.isNotEmpty(sourceParams) &&
StringUtils.isNotEmpty(targetParams);
} else if (SqoopJobType.CUSTOM.getDescp().equals(jobType)) {
sqoopParamsCheck = StringUtils.isNotEmpty(customShell) &&
StringUtils.isEmpty(jobName);
}
return sqoopParamsCheck;
}
@Override
public List<ResourceInfo> getResourceFilesList() {
return new ArrayList<>();
return new ArrayList<>();
}
}

2
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/task/sqoop/targets/TargetMysqlParameter.java

@ -106,7 +106,7 @@ public class TargetMysqlParameter {
this.preQuery = preQuery;
}
public boolean isUpdate() {
public boolean getIsUpdate() {
return isUpdate;
}

32
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/consumer/TaskPriorityQueueConsumer.java

@ -19,6 +19,7 @@ package org.apache.dolphinscheduler.server.master.consumer;
import com.alibaba.fastjson.JSONObject;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import org.apache.dolphinscheduler.common.enums.SqoopJobType;
import org.apache.dolphinscheduler.common.enums.TaskType;
import org.apache.dolphinscheduler.common.enums.UdfType;
import org.apache.dolphinscheduler.common.model.TaskNode;
@ -258,29 +259,32 @@ public class TaskPriorityQueueConsumer extends Thread{
/**
* set datax task relation
* set sqoop task relation
* @param sqoopTaskExecutionContext sqoopTaskExecutionContext
* @param taskNode taskNode
*/
private void setSqoopTaskRelation(SqoopTaskExecutionContext sqoopTaskExecutionContext, TaskNode taskNode) {
SqoopParameters sqoopParameters = JSONObject.parseObject(taskNode.getParams(), SqoopParameters.class);
SourceMysqlParameter sourceMysqlParameter = JSONUtils.parseObject(sqoopParameters.getSourceParams(), SourceMysqlParameter.class);
TargetMysqlParameter targetMysqlParameter = JSONUtils.parseObject(sqoopParameters.getTargetParams(), TargetMysqlParameter.class);
// sqoop job type is template set task relation
if (sqoopParameters.getJobType().equals(SqoopJobType.TEMPLATE.getDescp())) {
SourceMysqlParameter sourceMysqlParameter = JSONUtils.parseObject(sqoopParameters.getSourceParams(), SourceMysqlParameter.class);
TargetMysqlParameter targetMysqlParameter = JSONUtils.parseObject(sqoopParameters.getTargetParams(), TargetMysqlParameter.class);
DataSource dataSource = processService.findDataSourceById(sourceMysqlParameter.getSrcDatasource());
DataSource dataTarget = processService.findDataSourceById(targetMysqlParameter.getTargetDatasource());
DataSource dataSource = processService.findDataSourceById(sourceMysqlParameter.getSrcDatasource());
DataSource dataTarget = processService.findDataSourceById(targetMysqlParameter.getTargetDatasource());
if (dataSource != null){
sqoopTaskExecutionContext.setDataSourceId(dataSource.getId());
sqoopTaskExecutionContext.setSourcetype(dataSource.getType().getCode());
sqoopTaskExecutionContext.setSourceConnectionParams(dataSource.getConnectionParams());
}
if (dataSource != null){
sqoopTaskExecutionContext.setDataSourceId(dataSource.getId());
sqoopTaskExecutionContext.setSourcetype(dataSource.getType().getCode());
sqoopTaskExecutionContext.setSourceConnectionParams(dataSource.getConnectionParams());
}
if (dataTarget != null){
sqoopTaskExecutionContext.setDataTargetId(dataTarget.getId());
sqoopTaskExecutionContext.setTargetType(dataTarget.getType().getCode());
sqoopTaskExecutionContext.setTargetConnectionParams(dataTarget.getConnectionParams());
if (dataTarget != null){
sqoopTaskExecutionContext.setDataTargetId(dataTarget.getId());
sqoopTaskExecutionContext.setTargetType(dataTarget.getType().getCode());
sqoopTaskExecutionContext.setTargetConnectionParams(dataTarget.getConnectionParams());
}
}
}

39
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/CommonGenerator.java

@ -16,10 +16,17 @@
*/
package org.apache.dolphinscheduler.server.worker.task.sqoop.generator;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.process.Property;
import org.apache.dolphinscheduler.common.task.sqoop.SqoopParameters;
import org.apache.dolphinscheduler.common.utils.CollectionUtils;
import org.apache.dolphinscheduler.common.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
/**
* common script generator
*/
@ -32,6 +39,38 @@ public class CommonGenerator {
try{
result.append("sqoop ")
.append(sqoopParameters.getModelType());
//set sqoop job name
result.append(" -D mapred.job.name")
.append(Constants.EQUAL_SIGN)
.append(sqoopParameters.getJobName());
//set hadoop custom param
List<Property> hadoopCustomParams = sqoopParameters.getHadoopCustomParams();
if (CollectionUtils.isNotEmpty(hadoopCustomParams)) {
for (Property hadoopCustomParam : hadoopCustomParams) {
String hadoopCustomParamStr = " -D " + hadoopCustomParam.getProp()
+ Constants.EQUAL_SIGN + hadoopCustomParam.getValue();
if (StringUtils.isNotEmpty(hadoopCustomParamStr)) {
result.append(hadoopCustomParamStr);
}
}
}
//set sqoop advanced custom param
List<Property> sqoopAdvancedParams = sqoopParameters.getSqoopAdvancedParams();
if (CollectionUtils.isNotEmpty(sqoopAdvancedParams)) {
for (Property sqoopAdvancedParam : sqoopAdvancedParams) {
String sqoopAdvancedParamStr = " " + sqoopAdvancedParam.getProp()
+ " " + sqoopAdvancedParam.getValue();
if (StringUtils.isNotEmpty(sqoopAdvancedParamStr)) {
result.append(sqoopAdvancedParamStr);
}
}
}
if(sqoopParameters.getConcurrency() >0){
result.append(" -m ")
.append(sqoopParameters.getConcurrency());

22
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/SqoopJobGenerator.java

@ -16,6 +16,7 @@
*/
package org.apache.dolphinscheduler.server.worker.task.sqoop.generator;
import org.apache.dolphinscheduler.common.enums.SqoopJobType;
import org.apache.dolphinscheduler.common.task.sqoop.SqoopParameters;
import org.apache.dolphinscheduler.server.entity.TaskExecutionContext;
import org.apache.dolphinscheduler.server.worker.task.sqoop.generator.sources.HdfsSourceGenerator;
@ -62,14 +63,23 @@ public class SqoopJobGenerator {
* @return
*/
public String generateSqoopJob(SqoopParameters sqoopParameters,TaskExecutionContext taskExecutionContext){
createSqoopJobGenerator(sqoopParameters.getSourceType(),sqoopParameters.getTargetType());
if(sourceGenerator == null || targetGenerator == null){
return null;
String sqoopScripts = "";
if (SqoopJobType.TEMPLATE.getDescp().equals(sqoopParameters.getJobType())) {
createSqoopJobGenerator(sqoopParameters.getSourceType(),sqoopParameters.getTargetType());
if(sourceGenerator == null || targetGenerator == null){
throw new RuntimeException("sqoop task source type or target type is null");
}
sqoopScripts = commonGenerator.generate(sqoopParameters)
+ sourceGenerator.generate(sqoopParameters,taskExecutionContext)
+ targetGenerator.generate(sqoopParameters,taskExecutionContext);
} else if (SqoopJobType.CUSTOM.getDescp().equals(sqoopParameters.getJobType())) {
sqoopScripts = sqoopParameters.getCustomShell().replaceAll("\\r\\n", "\n");
}
return commonGenerator.generate(sqoopParameters)
+ sourceGenerator.generate(sqoopParameters,taskExecutionContext)
+ targetGenerator.generate(sqoopParameters,taskExecutionContext);
return sqoopScripts;
}
/**

14
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/sources/MysqlSourceGenerator.java

@ -77,19 +77,19 @@ public class MysqlSourceGenerator implements ISourceGenerator {
}else{
srcQuery += " WHERE $CONDITIONS";
}
result.append(" --query \'"+srcQuery+"\'");
result.append(" --query \'").append(srcQuery).append("\'");
}
List<Property> mapColumnHive = sourceMysqlParameter.getMapColumnHive();
if(mapColumnHive != null && !mapColumnHive.isEmpty()){
String columnMap = "";
StringBuilder columnMap = new StringBuilder();
for(Property item:mapColumnHive){
columnMap = item.getProp()+"="+ item.getValue()+",";
columnMap.append(item.getProp()).append("=").append(item.getValue()).append(",");
}
if(StringUtils.isNotEmpty(columnMap)){
if(StringUtils.isNotEmpty(columnMap.toString())){
result.append(" --map-column-hive ")
.append(columnMap.substring(0,columnMap.length()-1));
}
@ -98,12 +98,12 @@ public class MysqlSourceGenerator implements ISourceGenerator {
List<Property> mapColumnJava = sourceMysqlParameter.getMapColumnJava();
if(mapColumnJava != null && !mapColumnJava.isEmpty()){
String columnMap = "";
StringBuilder columnMap = new StringBuilder();
for(Property item:mapColumnJava){
columnMap = item.getProp()+"="+ item.getValue()+",";
columnMap.append(item.getProp()).append("=").append(item.getValue()).append(",");
}
if(StringUtils.isNotEmpty(columnMap)){
if(StringUtils.isNotEmpty(columnMap.toString())){
result.append(" --map-column-java ")
.append(columnMap.substring(0,columnMap.length()-1));
}

2
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/sqoop/generator/targets/MysqlTargetGenerator.java

@ -78,7 +78,7 @@ public class MysqlTargetGenerator implements ITargetGenerator {
result.append(" --lines-terminated-by '").append(targetMysqlParameter.getLinesTerminated()).append("'");
}
if(targetMysqlParameter.isUpdate()
if(targetMysqlParameter.getIsUpdate()
&& StringUtils.isNotEmpty(targetMysqlParameter.getTargetUpdateKey())
&& StringUtils.isNotEmpty(targetMysqlParameter.getTargetUpdateMode())){
result.append(" --update-key ").append(targetMysqlParameter.getTargetUpdateKey())

126
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/task/sqoop/SqoopTaskTest.java

@ -17,11 +17,9 @@
package org.apache.dolphinscheduler.server.worker.task.sqoop;
import com.alibaba.fastjson.JSON;
import org.apache.dolphinscheduler.common.enums.DbType;
import org.apache.dolphinscheduler.common.task.sqoop.SqoopParameters;
import org.apache.dolphinscheduler.dao.entity.DataSource;
import org.apache.dolphinscheduler.server.entity.SqoopTaskExecutionContext;
import org.apache.dolphinscheduler.server.entity.TaskExecutionContext;
import org.apache.dolphinscheduler.server.worker.task.TaskProps;
import org.apache.dolphinscheduler.server.worker.task.sqoop.generator.SqoopJobGenerator;
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
import org.apache.dolphinscheduler.service.process.ProcessService;
@ -35,7 +33,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.ApplicationContext;
import java.util.*;
import java.util.Date;
/**
* sqoop task test
@ -52,64 +50,98 @@ public class SqoopTaskTest {
@Before
public void before() throws Exception{
processService = Mockito.mock(ProcessService.class);
Mockito.when(processService.findDataSourceById(2)).thenReturn(getDataSource());
applicationContext = Mockito.mock(ApplicationContext.class);
SpringApplicationContext springApplicationContext = new SpringApplicationContext();
springApplicationContext.setApplicationContext(applicationContext);
Mockito.when(applicationContext.getBean(ProcessService.class)).thenReturn(processService);
TaskProps props = new TaskProps();
props.setTaskAppId(String.valueOf(System.currentTimeMillis()));
props.setTenantCode("1");
props.setEnvFile(".dolphinscheduler_env.sh");
props.setTaskStartTime(new Date());
props.setTaskTimeout(0);
props.setTaskParams("{\"concurrency\":1,\"modelType\":\"import\",\"sourceType\":\"MYSQL\",\"targetType\":\"HIVE\",\"sourceParams\":\"{\\\"srcDatasource\\\":2,\\\"srcTable\\\":\\\"person_2\\\",\\\"srcQueryType\\\":\\\"1\\\",\\\"srcQuerySql\\\":\\\"SELECT * FROM person_2\\\",\\\"srcColumnType\\\":\\\"0\\\",\\\"srcColumns\\\":\\\"\\\",\\\"srcConditionList\\\":[],\\\"mapColumnHive\\\":[],\\\"mapColumnJava\\\":[{\\\"prop\\\":\\\"id\\\",\\\"direct\\\":\\\"IN\\\",\\\"type\\\":\\\"VARCHAR\\\",\\\"value\\\":\\\"Integer\\\"}]}\",\"targetParams\":\"{\\\"hiveDatabase\\\":\\\"stg\\\",\\\"hiveTable\\\":\\\"person_internal_2\\\",\\\"createHiveTable\\\":true,\\\"dropDelimiter\\\":false,\\\"hiveOverWrite\\\":true,\\\"replaceDelimiter\\\":\\\"\\\",\\\"hivePartitionKey\\\":\\\"date\\\",\\\"hivePartitionValue\\\":\\\"2020-02-16\\\"}\",\"localParams\":[]}");
sqoopTask = new SqoopTask(new TaskExecutionContext(),logger);
TaskExecutionContext taskExecutionContext = new TaskExecutionContext();
taskExecutionContext.setTaskAppId(String.valueOf(System.currentTimeMillis()));
taskExecutionContext.setTenantCode("1");
taskExecutionContext.setEnvFile(".dolphinscheduler_env.sh");
taskExecutionContext.setStartTime(new Date());
taskExecutionContext.setTaskTimeout(0);
taskExecutionContext.setTaskParams("{\"jobName\":\"sqoop_import\",\"jobType\":\"TEMPLATE\",\"concurrency\":1," +
"\"modelType\":\"import\",\"sourceType\":\"MYSQL\",\"targetType\":\"HIVE\",\"sourceParams\":\"{\\\"srcDatasource\\\":2,\\\"srcTable\\\":\\\"person_2\\\"," +
"\\\"srcQueryType\\\":\\\"1\\\",\\\"srcQuerySql\\\":\\\"SELECT * FROM person_2\\\",\\\"srcColumnType\\\":\\\"0\\\"," +
"\\\"srcColumns\\\":\\\"\\\",\\\"srcConditionList\\\":[],\\\"mapColumnHive\\\":[]," +
"\\\"mapColumnJava\\\":[{\\\"prop\\\":\\\"id\\\",\\\"direct\\\":\\\"IN\\\",\\\"type\\\":\\\"VARCHAR\\\",\\\"value\\\":\\\"Integer\\\"}]}\"" +
",\"targetParams\":\"{\\\"hiveDatabase\\\":\\\"stg\\\",\\\"hiveTable\\\":\\\"person_internal_2\\\",\\\"createHiveTable\\\":true," +
"\\\"dropDelimiter\\\":false,\\\"hiveOverWrite\\\":true,\\\"replaceDelimiter\\\":\\\"\\\",\\\"hivePartitionKey\\\":\\\"date\\\"," +
"\\\"hivePartitionValue\\\":\\\"2020-02-16\\\"}\",\"localParams\":[]}");
sqoopTask = new SqoopTask(taskExecutionContext,logger);
//test sqoop tash init method
sqoopTask.init();
}
/**
* test SqoopJobGenerator
*/
@Test
public void testGenerator(){
String data1 = "{\"concurrency\":1,\"modelType\":\"import\",\"sourceType\":\"MYSQL\",\"targetType\":\"HDFS\",\"sourceParams\":\"{\\\"srcDatasource\\\":2,\\\"srcTable\\\":\\\"person_2\\\",\\\"srcQueryType\\\":\\\"0\\\",\\\"srcQuerySql\\\":\\\"\\\",\\\"srcColumnType\\\":\\\"0\\\",\\\"srcColumns\\\":\\\"\\\",\\\"srcConditionList\\\":[],\\\"mapColumnHive\\\":[],\\\"mapColumnJava\\\":[]}\",\"targetParams\":\"{\\\"targetPath\\\":\\\"/ods/tmp/test/person7\\\",\\\"deleteTargetDir\\\":true,\\\"fileType\\\":\\\"--as-textfile\\\",\\\"compressionCodec\\\":\\\"\\\",\\\"fieldsTerminated\\\":\\\"@\\\",\\\"linesTerminated\\\":\\\"\\\\\\\\n\\\"}\",\"localParams\":[]}";
SqoopParameters sqoopParameters1 = JSON.parseObject(data1,SqoopParameters.class);
TaskExecutionContext mysqlTaskExecutionContext = getMysqlTaskExecutionContext();
//sqoop TEMPLATE job
//import mysql to HDFS with hadoo
String mysqlToHdfs = "{\"jobName\":\"sqoop_import\",\"hadoopCustomParams\":[{\"prop\":\"mapreduce.map.memory.mb\",\"direct\":\"IN\",\"type\":\"VARCHAR\",\"value\":\"4096\"}],\"sqoopAdvancedParams\":[{\"prop\":\"--direct\",\"direct\":\"IN\",\"type\":\"VARCHAR\",\"value\":\"\"}]," +
"\"jobType\":\"TEMPLATE\",\"concurrency\":1,\"modelType\":\"import\",\"sourceType\":\"MYSQL\",\"targetType\":\"HDFS\",\"sourceParams\":\"{\\\"srcDatasource\\\":2,\\\"srcTable\\\":\\\"person_2\\\",\\\"srcQueryType\\\":\\\"0\\\",\\\"srcQuerySql\\\":\\\"\\\",\\\"srcColumnType\\\":\\\"0\\\",\\\"srcColumns\\\":\\\"\\\",\\\"srcConditionList\\\":[],\\\"mapColumnHive\\\":[],\\\"mapColumnJava\\\":[]}\",\"targetParams\":\"{\\\"targetPath\\\":\\\"/ods/tmp/test/person7\\\",\\\"deleteTargetDir\\\":true,\\\"fileType\\\":\\\"--as-textfile\\\",\\\"compressionCodec\\\":\\\"\\\",\\\"fieldsTerminated\\\":\\\"@\\\",\\\"linesTerminated\\\":\\\"\\\\\\\\n\\\"}\",\"localParams\":[]}";
SqoopParameters mysqlToHdfsParams = JSON.parseObject(mysqlToHdfs,SqoopParameters.class);
SqoopJobGenerator generator = new SqoopJobGenerator();
String script = generator.generateSqoopJob(sqoopParameters1,new TaskExecutionContext());
String expected = "sqoop import -m 1 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --table person_2 --target-dir /ods/tmp/test/person7 --as-textfile --delete-target-dir --fields-terminated-by '@' --lines-terminated-by '\\n' --null-non-string 'NULL' --null-string 'NULL'";
Assert.assertEquals(expected, script);
String data2 = "{\"concurrency\":1,\"modelType\":\"export\",\"sourceType\":\"HDFS\",\"targetType\":\"MYSQL\",\"sourceParams\":\"{\\\"exportDir\\\":\\\"/ods/tmp/test/person7\\\"}\",\"targetParams\":\"{\\\"targetDatasource\\\":2,\\\"targetTable\\\":\\\"person_3\\\",\\\"targetColumns\\\":\\\"id,name,age,sex,create_time\\\",\\\"preQuery\\\":\\\"\\\",\\\"isUpdate\\\":true,\\\"targetUpdateKey\\\":\\\"id\\\",\\\"targetUpdateMode\\\":\\\"allowinsert\\\",\\\"fieldsTerminated\\\":\\\"@\\\",\\\"linesTerminated\\\":\\\"\\\\\\\\n\\\"}\",\"localParams\":[]}";
SqoopParameters sqoopParameters2 = JSON.parseObject(data2,SqoopParameters.class);
String script2 = generator.generateSqoopJob(sqoopParameters2,new TaskExecutionContext());
String expected2 = "sqoop export -m 1 --export-dir /ods/tmp/test/person7 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --table person_3 --columns id,name,age,sex,create_time --fields-terminated-by '@' --lines-terminated-by '\\n' --update-key id --update-mode allowinsert";
Assert.assertEquals(expected2, script2);
String data3 = "{\"concurrency\":1,\"modelType\":\"export\",\"sourceType\":\"HIVE\",\"targetType\":\"MYSQL\",\"sourceParams\":\"{\\\"hiveDatabase\\\":\\\"stg\\\",\\\"hiveTable\\\":\\\"person_internal\\\",\\\"hivePartitionKey\\\":\\\"date\\\",\\\"hivePartitionValue\\\":\\\"2020-02-17\\\"}\",\"targetParams\":\"{\\\"targetDatasource\\\":2,\\\"targetTable\\\":\\\"person_3\\\",\\\"targetColumns\\\":\\\"\\\",\\\"preQuery\\\":\\\"\\\",\\\"isUpdate\\\":false,\\\"targetUpdateKey\\\":\\\"\\\",\\\"targetUpdateMode\\\":\\\"allowinsert\\\",\\\"fieldsTerminated\\\":\\\"@\\\",\\\"linesTerminated\\\":\\\"\\\\\\\\n\\\"}\",\"localParams\":[]}";
SqoopParameters sqoopParameters3 = JSON.parseObject(data3,SqoopParameters.class);
String script3 = generator.generateSqoopJob(sqoopParameters3,new TaskExecutionContext());
String expected3 = "sqoop export -m 1 --hcatalog-database stg --hcatalog-table person_internal --hcatalog-partition-keys date --hcatalog-partition-values 2020-02-17 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --table person_3 --fields-terminated-by '@' --lines-terminated-by '\\n'";
Assert.assertEquals(expected3, script3);
String data4 = "{\"concurrency\":1,\"modelType\":\"import\",\"sourceType\":\"MYSQL\",\"targetType\":\"HIVE\",\"sourceParams\":\"{\\\"srcDatasource\\\":2,\\\"srcTable\\\":\\\"person_2\\\",\\\"srcQueryType\\\":\\\"1\\\",\\\"srcQuerySql\\\":\\\"SELECT * FROM person_2\\\",\\\"srcColumnType\\\":\\\"0\\\",\\\"srcColumns\\\":\\\"\\\",\\\"srcConditionList\\\":[],\\\"mapColumnHive\\\":[],\\\"mapColumnJava\\\":[{\\\"prop\\\":\\\"id\\\",\\\"direct\\\":\\\"IN\\\",\\\"type\\\":\\\"VARCHAR\\\",\\\"value\\\":\\\"Integer\\\"}]}\",\"targetParams\":\"{\\\"hiveDatabase\\\":\\\"stg\\\",\\\"hiveTable\\\":\\\"person_internal_2\\\",\\\"createHiveTable\\\":true,\\\"dropDelimiter\\\":false,\\\"hiveOverWrite\\\":true,\\\"replaceDelimiter\\\":\\\"\\\",\\\"hivePartitionKey\\\":\\\"date\\\",\\\"hivePartitionValue\\\":\\\"2020-02-16\\\"}\",\"localParams\":[]}";
SqoopParameters sqoopParameters4 = JSON.parseObject(data4,SqoopParameters.class);
String script4 = generator.generateSqoopJob(sqoopParameters4,new TaskExecutionContext());
String expected4 = "sqoop import -m 1 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --query 'SELECT * FROM person_2 WHERE $CONDITIONS' --map-column-java id=Integer --hive-import --hive-table stg.person_internal_2 --create-hive-table --hive-overwrite -delete-target-dir --hive-partition-key date --hive-partition-value 2020-02-16";
Assert.assertEquals(expected4, script4);
String mysqlToHdfsScript = generator.generateSqoopJob(mysqlToHdfsParams,mysqlTaskExecutionContext);
String mysqlToHdfsExpected = "sqoop import -D mapred.job.name=sqoop_import -D mapreduce.map.memory.mb=4096 --direct -m 1 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --table person_2 --target-dir /ods/tmp/test/person7 --as-textfile --delete-target-dir --fields-terminated-by '@' --lines-terminated-by '\\n' --null-non-string 'NULL' --null-string 'NULL'";
Assert.assertEquals(mysqlToHdfsExpected, mysqlToHdfsScript);
//export hdfs to mysql using update mode
String hdfsToMysql = "{\"jobName\":\"sqoop_import\",\"jobType\":\"TEMPLATE\",\"concurrency\":1,\"modelType\":\"export\",\"sourceType\":\"HDFS\"," +
"\"targetType\":\"MYSQL\",\"sourceParams\":\"{\\\"exportDir\\\":\\\"/ods/tmp/test/person7\\\"}\"," +
"\"targetParams\":\"{\\\"targetDatasource\\\":2,\\\"targetTable\\\":\\\"person_3\\\",\\\"targetColumns\\\":\\\"id,name,age,sex,create_time\\\",\\\"preQuery\\\":\\\"\\\",\\\"isUpdate\\\":true,\\\"targetUpdateKey\\\":\\\"id\\\",\\\"targetUpdateMode\\\":\\\"allowinsert\\\",\\\"fieldsTerminated\\\":\\\"@\\\",\\\"linesTerminated\\\":\\\"\\\\\\\\n\\\"}\",\"localParams\":[]}";
SqoopParameters hdfsToMysqlParams = JSON.parseObject(hdfsToMysql,SqoopParameters.class);
String hdfsToMysqlScript = generator.generateSqoopJob(hdfsToMysqlParams,mysqlTaskExecutionContext);
String hdfsToMysqlScriptExpected = "sqoop export -D mapred.job.name=sqoop_import -m 1 --export-dir /ods/tmp/test/person7 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --table person_3 --columns id,name,age,sex,create_time --fields-terminated-by '@' --lines-terminated-by '\\n' --update-key id --update-mode allowinsert";
Assert.assertEquals(hdfsToMysqlScriptExpected, hdfsToMysqlScript);
//export hive to mysql
String hiveToMysql = "{\"jobName\":\"sqoop_import\",\"jobType\":\"TEMPLATE\",\"concurrency\":1,\"modelType\":\"export\",\"sourceType\":\"HIVE\",\"targetType\":\"MYSQL\",\"sourceParams\":\"{\\\"hiveDatabase\\\":\\\"stg\\\",\\\"hiveTable\\\":\\\"person_internal\\\",\\\"hivePartitionKey\\\":\\\"date\\\",\\\"hivePartitionValue\\\":\\\"2020-02-17\\\"}\",\"targetParams\":\"{\\\"targetDatasource\\\":2,\\\"targetTable\\\":\\\"person_3\\\",\\\"targetColumns\\\":\\\"\\\",\\\"preQuery\\\":\\\"\\\",\\\"isUpdate\\\":false,\\\"targetUpdateKey\\\":\\\"\\\",\\\"targetUpdateMode\\\":\\\"allowinsert\\\",\\\"fieldsTerminated\\\":\\\"@\\\",\\\"linesTerminated\\\":\\\"\\\\\\\\n\\\"}\",\"localParams\":[]}";
SqoopParameters hiveToMysqlParams = JSON.parseObject(hiveToMysql,SqoopParameters.class);
String hiveToMysqlScript = generator.generateSqoopJob(hiveToMysqlParams,mysqlTaskExecutionContext);
String hiveToMysqlExpected = "sqoop export -D mapred.job.name=sqoop_import -m 1 --hcatalog-database stg --hcatalog-table person_internal --hcatalog-partition-keys date --hcatalog-partition-values 2020-02-17 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --table person_3 --fields-terminated-by '@' --lines-terminated-by '\\n'";
Assert.assertEquals(hiveToMysqlExpected, hiveToMysqlScript);
//import mysql to hive
String mysqlToHive = "{\"jobName\":\"sqoop_import\",\"jobType\":\"TEMPLATE\",\"concurrency\":1,\"modelType\":\"import\",\"sourceType\":\"MYSQL\",\"targetType\":\"HIVE\",\"sourceParams\":\"{\\\"srcDatasource\\\":2,\\\"srcTable\\\":\\\"person_2\\\",\\\"srcQueryType\\\":\\\"1\\\",\\\"srcQuerySql\\\":\\\"SELECT * FROM person_2\\\",\\\"srcColumnType\\\":\\\"0\\\",\\\"srcColumns\\\":\\\"\\\",\\\"srcConditionList\\\":[],\\\"mapColumnHive\\\":[],\\\"mapColumnJava\\\":[{\\\"prop\\\":\\\"id\\\",\\\"direct\\\":\\\"IN\\\",\\\"type\\\":\\\"VARCHAR\\\",\\\"value\\\":\\\"Integer\\\"}]}\",\"targetParams\":\"{\\\"hiveDatabase\\\":\\\"stg\\\",\\\"hiveTable\\\":\\\"person_internal_2\\\",\\\"createHiveTable\\\":true,\\\"dropDelimiter\\\":false,\\\"hiveOverWrite\\\":true,\\\"replaceDelimiter\\\":\\\"\\\",\\\"hivePartitionKey\\\":\\\"date\\\",\\\"hivePartitionValue\\\":\\\"2020-02-16\\\"}\",\"localParams\":[]}";
SqoopParameters mysqlToHiveParams = JSON.parseObject(mysqlToHive,SqoopParameters.class);
String mysqlToHiveScript = generator.generateSqoopJob(mysqlToHiveParams,mysqlTaskExecutionContext);
String mysqlToHiveExpected = "sqoop import -D mapred.job.name=sqoop_import -m 1 --connect jdbc:mysql://192.168.0.111:3306/test --username kylo --password 123456 --query 'SELECT * FROM person_2 WHERE $CONDITIONS' --map-column-java id=Integer --hive-import --hive-table stg.person_internal_2 --create-hive-table --hive-overwrite -delete-target-dir --hive-partition-key date --hive-partition-value 2020-02-16";
Assert.assertEquals(mysqlToHiveExpected, mysqlToHiveScript);
//sqoop CUSTOM job
String sqoopCustomString = "{\"jobType\":\"CUSTOM\",\"localParams\":[],\"customShell\":\"sqoop import\"}";
SqoopParameters sqoopCustomParams = JSON.parseObject(sqoopCustomString, SqoopParameters.class);
String sqoopCustomScript = generator.generateSqoopJob(sqoopCustomParams, new TaskExecutionContext());
String sqoopCustomExpected = "sqoop import";
Assert.assertEquals(sqoopCustomExpected, sqoopCustomScript);
}
private DataSource getDataSource() {
DataSource dataSource = new DataSource();
dataSource.setType(DbType.MYSQL);
dataSource.setConnectionParams(
"{\"address\":\"jdbc:mysql://192.168.0.111:3306\",\"database\":\"test\",\"jdbcUrl\":\"jdbc:mysql://192.168.0.111:3306/test\",\"user\":\"kylo\",\"password\":\"123456\"}");
dataSource.setUserId(1);
return dataSource;
/**
* get taskExecutionContext include mysql
* @return TaskExecutionContext
*/
private TaskExecutionContext getMysqlTaskExecutionContext() {
TaskExecutionContext taskExecutionContext = new TaskExecutionContext();
SqoopTaskExecutionContext sqoopTaskExecutionContext = new SqoopTaskExecutionContext();
String mysqlSourceConnectionParams = "{\"address\":\"jdbc:mysql://192.168.0.111:3306\",\"database\":\"test\",\"jdbcUrl\":\"jdbc:mysql://192.168.0.111:3306/test\",\"user\":\"kylo\",\"password\":\"123456\"}";
String mysqlTargetConnectionParams = "{\"address\":\"jdbc:mysql://192.168.0.111:3306\",\"database\":\"test\",\"jdbcUrl\":\"jdbc:mysql://192.168.0.111:3306/test\",\"user\":\"kylo\",\"password\":\"123456\"}";
sqoopTaskExecutionContext.setDataSourceId(2);
sqoopTaskExecutionContext.setDataTargetId(2);
sqoopTaskExecutionContext.setSourcetype(0);
sqoopTaskExecutionContext.setTargetConnectionParams(mysqlTargetConnectionParams);
sqoopTaskExecutionContext.setSourceConnectionParams(mysqlSourceConnectionParams);
sqoopTaskExecutionContext.setTargetType(0);
taskExecutionContext.setSqoopTaskExecutionContext(sqoopTaskExecutionContext);
return taskExecutionContext;
}
@Test

2
dolphinscheduler-ui/src/js/conf/home/pages/dag/_source/formModel/tasks/_source/datasource.vue

@ -131,7 +131,7 @@
},
created () {
let supportType = this.supportType || []
this.typeList = _.cloneDeep(this.store.state.dag.dsTypeListS)
this.typeList = this.data.typeList || _.cloneDeep(this.store.state.dag.dsTypeListS)
// Have a specified data source
if (supportType.length) {
let is = (type) => {

962
dolphinscheduler-ui/src/js/conf/home/pages/dag/_source/formModel/tasks/sqoop.vue

File diff suppressed because it is too large Load Diff

7
dolphinscheduler-ui/src/js/module/i18n/locale/en_US.js

@ -540,6 +540,9 @@ export default {
'Whether directory': 'Whether directory',
Yes: 'Yes',
No: 'No',
'Hadoop Custom Params': 'Hadoop Params',
'Sqoop Advanced Parameters': 'Sqoop Params',
'Sqoop Job Name': 'Job Name',
'Please enter Mysql Database(required)': 'Please enter Mysql Database(required)',
'Please enter Mysql Table(required)': 'Please enter Mysql Table(required)',
'Please enter Columns (Comma separated)': 'Please enter Columns (Comma separated)',
@ -554,6 +557,8 @@ export default {
'Please enter Lines Terminated': 'Please enter Lines Terminated',
'Please enter Concurrency': 'Please enter Concurrency',
'Please enter Update Key': 'Please enter Update Key',
'Please enter Job Name(required)': 'Please enter Job Name(required)',
'Please enter Custom Shell(required)': 'Please enter Custom Shell(required)',
Direct: 'Direct',
Type: 'Type',
ModelType: 'ModelType',
@ -587,6 +592,8 @@ export default {
'All Columns': 'All Columns',
'Some Columns': 'Some Columns',
'Branch flow': 'Branch flow',
'Custom Job': 'Custom Job',
'Custom Script': 'Custom Script',
'Cannot select the same node for successful branch flow and failed branch flow': 'Cannot select the same node for successful branch flow and failed branch flow',
'Successful branch flow and failed branch flow are required': 'Successful branch flow and failed branch flow are required',
'Unauthorized or deleted resources': 'Unauthorized or deleted resources',

7
dolphinscheduler-ui/src/js/module/i18n/locale/zh_CN.js

@ -540,6 +540,9 @@ export default {
'Whether directory': '是否文件夹',
Yes: '是',
No: '否',
'Hadoop Custom Params': 'Hadoop参数',
'Sqoop Advanced Parameters': 'Sqoop参数',
'Sqoop Job Name': '任务名称',
'Please enter Mysql Database(required)': '请输入Mysql数据库(必填)',
'Please enter Mysql Table(required)': '请输入Mysql表名(必填)',
'Please enter Columns (Comma separated)': '请输入列名 , 隔开',
@ -554,6 +557,8 @@ export default {
'Please enter Lines Terminated': '请输入行分隔符',
'Please enter Concurrency': '请输入并发度',
'Please enter Update Key': '请输入更新列',
'Please enter Job Name(required)': '请输入任务名称(必填)',
'Please enter Custom Shell(required)': '请输入自定义脚本',
Direct: '流向',
Type: '类型',
ModelType: '模式',
@ -587,6 +592,8 @@ export default {
'All Columns': '全表导入',
'Some Columns': '选择列',
'Branch flow': '分支流转',
'Custom Job': '自定义任务',
'Custom Script': '自定义脚本',
'Cannot select the same node for successful branch flow and failed branch flow': '成功分支流转和失败分支流转不能选择同一个节点',
'Successful branch flow and failed branch flow are required': '成功分支流转和失败分支流转必填',
'Unauthorized or deleted resources': '未授权或已删除资源',

1
pom.xml

@ -821,6 +821,7 @@
<include>**/server/worker/task/spark/SparkTaskTest.java</include>
<include>**/server/worker/task/EnvFileTest.java</include>
<include>**/server/worker/task/spark/SparkTaskTest.java</include>
<include>**/server/worker/task/sqoop/SqoopTaskTest.java</include>
<include>**/server/worker/EnvFileTest.java</include>
<include>**/service/quartz/cron/CronUtilsTest.java</include>
<include>**/service/zk/DefaultEnsembleProviderTest.java</include>

Loading…
Cancel
Save