Browse Source

[Feature][DataQuality] Refactor DataQuality Module (#5717)

Co-authored-by: sunchaohe <sunzhaohe@linklogis.com>
data_quality_design
zixi0825 3 years ago committed by GitHub
parent
commit
7fcaa047fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/DqExecuteResultService.java
  2. 15
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/DqExecuteResultServiceImpl.java
  3. 16
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/DqRuleServiceImpl.java
  4. 19
      dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/DqExecuteResultServiceTest.java
  5. 2
      dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/DqRuleServiceTest.java
  6. 33
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java
  7. 12
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/dq/CheckType.java
  8. 12
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/dq/DqFailureStrategy.java
  9. 3
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/dq/OptionSourceType.java
  10. 96
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/JdbcInfo.java
  11. 4
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java
  12. 18
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/StringUtils.java
  13. 4
      dolphinscheduler-common/src/main/resources/common.properties
  14. 152
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqComparisonType.java
  15. 65
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqExecuteResult.java
  16. 10
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqExecuteResultAlertContent.java
  17. 14
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqRuleExecuteSql.java
  18. 224
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqTaskStatisticsValue.java
  19. 156
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/TaskAlertContent.java
  20. 29
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqComparisonTypeMapper.java
  21. 7
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqExecuteResultMapper.java
  22. 2
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqRuleMapper.java
  23. 29
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqTaskStatisticsValueMapper.java
  24. 22
      dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqComparisonTypeMapper.xml
  25. 15
      dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqExecuteResultMapper.xml
  26. 6
      dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqRuleMapper.xml
  27. 22
      dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqTaskStatisticsValueMapper.xml
  28. 18
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/Constants.java
  29. 52
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/DataQualityApplication.java
  30. 10
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/BaseConfig.java
  31. 94
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/Config.java
  32. 133
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/DataQualityConfiguration.java
  33. 18
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/EnvConfig.java
  34. 6
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/IConfig.java
  35. 14
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/ReaderConfig.java
  36. 32
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/TransformerConfig.java
  37. 46
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/ValidateResult.java
  38. 14
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/WriterConfig.java
  39. 67
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/ConnectorParameter.java
  40. 104
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/DataQualityConfiguration.java
  41. 77
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/ExecutorParameter.java
  42. 80
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/context/DataQualityContext.java
  43. 8
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/enums/ReaderType.java
  44. 38
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/enums/TransformerType.java
  45. 4
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/enums/WriterType.java
  46. 40
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/exception/ConfigRuntimeException.java
  47. 35
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/execution/Execution.java
  48. 132
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/execution/SparkBatchExecution.java
  49. 72
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/execution/SparkRuntimeEnvironment.java
  50. 56
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/Component.java
  51. 90
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/JdbcBaseConfig.java
  52. 37
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/BatchReader.java
  53. 38
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/BatchTransformer.java
  54. 37
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/BatchWriter.java
  55. 68
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/reader/HiveReader.java
  56. 95
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/reader/JdbcReader.java
  57. 76
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/reader/ReaderFactory.java
  58. 62
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/transformer/SqlTransformer.java
  59. 72
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/transformer/TransformerFactory.java
  60. 86
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/JdbcWriter.java
  61. 81
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/WriterFactory.java
  62. 130
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/file/BaseFileWriter.java
  63. 48
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/file/HdfsFileWriter.java
  64. 47
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/file/LocalFileWriter.java
  65. 73
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/connector/ConnectorFactory.java
  66. 55
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/connector/HiveConnector.java
  67. 61
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/connector/JdbcConnector.java
  68. 59
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/executor/SparkSqlExecuteTask.java
  69. 68
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/writer/JdbcWriter.java
  70. 69
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/writer/WriterFactory.java
  71. 56
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/utils/ConfigUtils.java
  72. 2
      dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/utils/JsonUtils.java
  73. 24
      dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/SparkApplicationTestBase.java
  74. 58
      dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/configuration/ConfigurationParserTest.java
  75. 21
      dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/reader/JdbcReaderTest.java
  76. 26
      dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/reader/ReaderFactoryTest.java
  77. 24
      dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/writer/JdbcWriterTest.java
  78. 19
      dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/writer/WriterFactoryTest.java
  79. 128
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/entity/DataQualityTaskExecutionContext.java
  80. 20
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java
  81. 127
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/consumer/TaskPriorityQueueConsumer.java
  82. 122
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/TaskResponseService.java
  83. 47
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/AlertManager.java
  84. 182
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/DataQualityResultOperator.java
  85. 112
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/JdbcUrlParser.java
  86. 36
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/Md5Utils.java
  87. 330
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/RuleParserUtils.java
  88. 70
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/DataQualityTask.java
  89. 45
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/RuleManager.java
  90. 8
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/BaseConfig.java
  91. 81
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/DataQualityConfiguration.java
  92. 34
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/EnvConfig.java
  93. 68
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/ExecutorParameter.java
  94. 39
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/MultiTableAccuracyRuleParser.java
  95. 26
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/MultiTableComparisonRuleParser.java
  96. 43
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/SingleTableCustomSqlRuleParser.java
  97. 35
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/SingleTableRuleParser.java
  98. 23
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/registry/DependencyConfig.java
  99. 44
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/utils/JdbcUrlParserTest.java
  100. 23
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackServiceTestConfig.java
  101. Some files were not shown because too many files have changed in this diff Show More

2
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/DqExecuteResultService.java

@ -26,8 +26,6 @@ import java.util.Map;
*/
public interface DqExecuteResultService {
Map<String, Object> getByTaskInstanceId(int taskInstanceId);
Map<String, Object> queryResultListPaging(User loginUser,
String searchVal,
Integer state,

15
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/DqExecuteResultServiceImpl.java

@ -23,7 +23,6 @@ import org.apache.dolphinscheduler.api.enums.Status;
import org.apache.dolphinscheduler.api.service.BaseService;
import org.apache.dolphinscheduler.api.service.DqExecuteResultService;
import org.apache.dolphinscheduler.api.utils.PageInfo;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.StringUtils;
import org.apache.dolphinscheduler.dao.entity.DqExecuteResult;
@ -37,7 +36,6 @@ import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
@ -50,19 +48,6 @@ public class DqExecuteResultServiceImpl extends BaseService implements DqExecute
@Autowired
private DqExecuteResultMapper dqExecuteResultMapper;
@Override
public Map<String, Object> getByTaskInstanceId(int taskInstanceId) {
Map<String, Object> result = new HashMap<>(5);
DqExecuteResult dqExecuteResult =
dqExecuteResultMapper.selectOne(new QueryWrapper<DqExecuteResult>().eq("task_instance_id",taskInstanceId));
result.put(Constants.DATA_LIST, dqExecuteResult);
putMsg(result, Status.SUCCESS);
return result;
}
@Override
public Map<String, Object> queryResultListPaging(User loginUser,
String searchVal,

16
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/DqRuleServiceImpl.java

@ -40,11 +40,13 @@ import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.StringUtils;
import org.apache.dolphinscheduler.dao.entity.DataSource;
import org.apache.dolphinscheduler.dao.entity.DqComparisonType;
import org.apache.dolphinscheduler.dao.entity.DqRule;
import org.apache.dolphinscheduler.dao.entity.DqRuleExecuteSql;
import org.apache.dolphinscheduler.dao.entity.DqRuleInputEntry;
import org.apache.dolphinscheduler.dao.entity.User;
import org.apache.dolphinscheduler.dao.mapper.DataSourceMapper;
import org.apache.dolphinscheduler.dao.mapper.DqComparisonTypeMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleExecuteSqlMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleInputEntryMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleMapper;
@ -89,6 +91,9 @@ public class DqRuleServiceImpl extends BaseService implements DqRuleService {
@Autowired
private DataSourceMapper dataSourceMapper;
@Autowired
private DqComparisonTypeMapper dqComparisonTypeMapper;
@Override
public Map<String, Object> getRuleFormCreateJsonById(int id) {
@ -176,7 +181,6 @@ public class DqRuleServiceImpl extends BaseService implements DqRuleService {
dqRuleMapper.queryRuleListPaging(
page,
searchVal,
loginUser.getId(),
ruleType,
start,
end);
@ -271,6 +275,16 @@ public class DqRuleServiceImpl extends BaseService implements DqRuleService {
options.add(paramsOptions);
}
break;
case COMPARISON_TYPE:
options = new ArrayList<>();
ParamsOptions comparisonOptions = null;
List<DqComparisonType> list = dqComparisonTypeMapper.selectList(new QueryWrapper<DqComparisonType>().orderByAsc("id"));
for (DqComparisonType type: list) {
comparisonOptions = new ParamsOptions(type.getType(), type.getId(),false);
options.add(comparisonOptions);
}
break;
default:
break;
}

19
dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/DqExecuteResultServiceTest.java

@ -47,7 +47,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.test.context.SpringBootTest;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
@ -62,16 +61,6 @@ public class DqExecuteResultServiceTest {
@Mock
DqExecuteResultMapper dqExecuteResultMapper;
@Test
public void testGetByTaskInstanceId() {
when(dqExecuteResultMapper.selectOne(
new QueryWrapper<DqExecuteResult>().eq("task_instance_id",1)))
.thenReturn(getExecuteResult());
Map<String,Object> result = dqExecuteResultService.getByTaskInstanceId(1);
Assert.assertEquals(Status.SUCCESS,result.get(Constants.STATUS));
}
@Test
public void testQueryResultListPaging() {
@ -95,14 +84,6 @@ public class DqExecuteResultServiceTest {
Assert.assertEquals(Status.SUCCESS,result.get(Constants.STATUS));
}
public DqExecuteResult getExecuteResult() {
DqExecuteResult dqExecuteResult = new DqExecuteResult();
dqExecuteResult.setId(1);
dqExecuteResult.setState(DqTaskState.FAILURE);
return dqExecuteResult;
}
public List<DqExecuteResult> getExecuteResultList() {
List<DqExecuteResult> list = new ArrayList<>();

2
dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/DqRuleServiceTest.java

@ -130,7 +130,7 @@ public class DqRuleServiceTest {
page.setRecords(getRuleList());
when(dqRuleMapper.queryRuleListPaging(
any(IPage.class), eq(""), eq(loginUser.getId()), eq(ruleType), eq(start), eq(end))).thenReturn(page);
any(IPage.class), eq(""), eq(ruleType), eq(start), eq(end))).thenReturn(page);
when(dqRuleInputEntryMapper.getRuleInputEntryList(1)).thenReturn(getRuleInputEntryList());
when(dqRuleExecuteSqlMapper.getExecuteSqlList(1)).thenReturn(getRuleExecuteSqlList());

33
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java

@ -206,6 +206,10 @@ public final class Constants {
public static final String ZOOKEEPER_DOLPHINSCHEDULER_LOCK_FAILOVER_STARTUP_MASTERS = "/lock/failover/startup-masters";
/**
* dot .
*/
public static final String DOT = ".";
/**
* comma ,
*/
@ -221,6 +225,11 @@ public final class Constants {
*/
public static final String COLON = ":";
/**
* QUESTION ?
*/
public static final String QUESTION = "?";
/**
* SPACE " "
*/
@ -960,7 +969,9 @@ public final class Constants {
public static final String URL = "url";
public static final String DRIVER = "driver";
public static final String SQL = "sql";
public static final String INPUT_TABLE = "input_table";
public static final String OUTPUT_TABLE = "output_table";
public static final String TMP_TABLE = "tmp_table";
/**
* session timeout
@ -1046,9 +1057,14 @@ public final class Constants {
public static final String PSTREE = "pstree";
/**
* dolphinscheduler.env.path
* data.quality.jar.name
*/
public static final String DATA_QUALITY_JAR_NAME = "data-quality.jar.name";
/**
* data.quality.error.output.path
*/
public static final String DOLPHINSCHEDULER_DQ_JAR_NAME = "dq.jar.name";
public static final String DATA_QUALITY_ERROR_OUTPUT_PATH = "data-quality.error.output.path";
/**
* double brackets left
@ -1086,7 +1102,7 @@ public final class Constants {
public static final String STATISTICS_NAME = "statistics_name";
public static final String STATISTICS_EXECUTE_SQL = "statistics_execute_sql";
public static final String COMPARISON_NAME = "comparison_name";
public static final String COMPARISON_TITLE = "comparison_title";
public static final String COMPARISON_TYPE = "comparison_type";
public static final String COMPARISON_VALUE = "comparison_value";
public static final String COMPARISON_EXECUTE_SQL = "comparison_execute_sql";
public static final String MAPPING_COLUMNS = "mapping_columns";
@ -1101,8 +1117,15 @@ public final class Constants {
public static final String WRITER_CONNECTOR_TYPE = "writer_connector_type";
public static final String WRITER_DATASOURCE_ID = "writer_datasource_id";
public static final String TASK_INSTANCE_ID = "task_instance_id";
public static final String NAME = "";
public static final String UNIQUE_CODE = "unique_code";
public static final String SMALL = "small";
public static final String CHANGE = "change";
public static final String RULE_NAME = "rule_name";
public static final String DATA_TIME = "data_time";
public static final String REGEXP_PATTERN = "regexp_pattern";
public static final String ERROR_OUTPUT_PATH = "error_output_path";
public static final String INDEX = "index";
public static final String PATH = "path";
public static final String HDFS_FILE = "hdfs_file";
public static final String BATCH = "batch";
}

12
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/dq/CheckType.java

@ -28,13 +28,15 @@ import com.fasterxml.jackson.annotation.JsonValue;
*/
public enum CheckType {
/**
* 0-statistics_compare_fixed_value
* 1-statistics_compare_comparison
* 0-comparison_minus_statistics
* 1-statistics_minus_comparison
* 2-statistics_comparison_percentage
* 3-statistics_comparison_difference_comparison_percentage
*/
STATISTICS_COMPARE_FIXED_VALUE(0,"statistics_compare_fixed_value"),
STATISTICS_COMPARE_COMPARISON(1,"statistics_compare_comparison"),
STATISTICS_COMPARISON_PERCENTAGE(2,"statistics_comparison_percentage");
COMPARISON_MINUS_STATISTICS(0,"comparison_minus_statistics"),
STATISTICS_MINUS_COMPARISON(1,"statistics_minus_comparison"),
STATISTICS_COMPARISON_PERCENTAGE(2,"statistics_comparison_percentage"),
STATISTICS_COMPARISON_DIFFERENCE_COMPARISON_PERCENTAGE(3,"statistics_comparison_difference_comparison_percentage");
CheckType(int code, String description) {
this.code = code;

12
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/dq/DqFailureStrategy.java

@ -27,15 +27,11 @@ import com.baomidou.mybatisplus.annotation.EnumValue;
*/
public enum DqFailureStrategy {
/**
* 0-ending process when dqs tasks failed.
* 1-continue running when dqs tasks failed.
* 2-ending process and alter when dqs tasks failed.
* 3-continue running and alter when dqs tasks failed.
* 0-alert and continue when dqc tasks failed.
* 1-alert and block when dqc tasks failed.
**/
END(0, "end"),
CONTINUE(1, "continue"),
END_ALTER(2, "end_alter"),
CONTINUE_ALTER(3, "continue_alter");
ALERT(0, "alert"),
BLOCK(1, "block");
DqFailureStrategy(int code, String description) {
this.code = code;

3
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/enums/dq/OptionSourceType.java

@ -33,7 +33,8 @@ public enum OptionSourceType {
*/
DEFAULT(0,"default"),
DATASOURCE_ID(1,"datasource_id"),
DATASOURCE_TYPE(2,"datasource_type");
DATASOURCE_TYPE(2,"datasource_type"),
COMPARISON_TYPE(3,"comparison_type");
OptionSourceType(int code, String description) {
this.code = code;

96
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/JdbcInfo.java

@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.common.model;
/**
* JdbcInfo
*/
public class JdbcInfo {
private String host;
private String port;
private String driverName;
private String database;
private String params;
private String address;
public String getHost() {
return host;
}
public void setHost(String host) {
this.host = host;
}
public String getPort() {
return port;
}
public void setPort(String port) {
this.port = port;
}
public String getDriverName() {
return driverName;
}
public void setDriverName(String driverName) {
this.driverName = driverName;
}
public String getDatabase() {
return database;
}
public void setDatabase(String database) {
this.database = database;
}
public String getParams() {
return params;
}
public void setParams(String params) {
this.params = params;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
@Override
public String toString() {
return "JdbcInfo{"
+ "host='" + host + '\''
+ ", port='" + port + '\''
+ ", driverName='" + driverName + '\''
+ ", database='" + database + '\''
+ ", params='" + params + '\''
+ ", address='" + address + '\''
+ '}';
}
}

4
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java

@ -42,8 +42,8 @@ public class CommonUtils {
throw new UnsupportedOperationException("Construct CommonUtils");
}
public static String getDqJarName() {
String dqsJarName = PropertyUtils.getString(Constants.DOLPHINSCHEDULER_DQ_JAR_NAME);
public static String getDataQualityJarName() {
String dqsJarName = PropertyUtils.getString(Constants.DATA_QUALITY_JAR_NAME);
if (StringUtils.isEmpty(dqsJarName)) {
return "dolphinscheduler-data-quality.jar";

18
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/StringUtils.java

@ -17,6 +17,9 @@
package org.apache.dolphinscheduler.common.utils;
import static org.apache.dolphinscheduler.common.Constants.SINGLE_QUOTES;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.exception.DolphinException;
import java.io.IOException;
@ -174,4 +177,19 @@ public class StringUtils {
private static String hex(char ch) {
return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
}
public static String wrapperSingleQuotes(String value) {
return SINGLE_QUOTES + value + SINGLE_QUOTES;
}
public static String replaceDoubleBrackets(String mainParameter) {
mainParameter = mainParameter
.replace(Constants.DOUBLE_BRACKETS_LEFT, Constants.DOUBLE_BRACKETS_LEFT_SPACE)
.replace(Constants.DOUBLE_BRACKETS_RIGHT, Constants.DOUBLE_BRACKETS_RIGHT_SPACE);
if (mainParameter.contains(Constants.DOUBLE_BRACKETS_LEFT) || mainParameter.contains(Constants.DOUBLE_BRACKETS_RIGHT)) {
return replaceDoubleBrackets(mainParameter);
} else {
return mainParameter;
}
}
}

4
dolphinscheduler-common/src/main/resources/common.properties

@ -73,7 +73,9 @@ kerberos.expire.time=2
datasource.encryption.enable=false
datasource.encryption.salt=!@#$%^&*
dq.jar.name=dolphinscheduler-data-quality-1.3.4-SNAPSHOT.jar
# data quality option
data-quality.jar.name=dolphinscheduler-data-quality-1.3.4-SNAPSHOT.jar
#data-quality.error.output.path=/tmp/data-quality-error-data
# Network IP gets priority, default inner outer
#dolphin.scheduler.network.priority.strategy=default

152
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqComparisonType.java

@ -0,0 +1,152 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.dao.entity;
import java.io.Serializable;
import java.util.Date;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.annotation.JsonFormat;
@TableName("t_ds_dq_comparison_type")
public class DqComparisonType implements Serializable {
/**
* primary key
*/
@TableId(value = "id", type = IdType.AUTO)
private int id;
/**
* type
*/
@TableField(value = "type")
private String type;
/**
* execute sql
*/
@TableField(value = "execute_sql")
private String executeSql;
/**
* output table
*/
@TableField(value = "output_table")
private String outputTable;
/**
* comparison name
*/
@TableField(value = "name")
private String name;
/**
* is inner source
*/
@TableField(value = "is_inner_source")
private Boolean isInnerSource;
/**
* create_time
*/
@TableField(value = "create_time")
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
private Date createTime;
/**
* update_time
*/
@TableField(value = "update_time")
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
private Date updateTime;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getExecuteSql() {
return executeSql;
}
public void setExecuteSql(String executeSql) {
this.executeSql = executeSql;
}
public String getOutputTable() {
return outputTable;
}
public void setOutputTable(String outputTable) {
this.outputTable = outputTable;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Boolean getInnerSource() {
return isInnerSource;
}
public void setInnerSource(Boolean innerSource) {
isInnerSource = innerSource;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
public Date getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Date updateTime) {
this.updateTime = updateTime;
}
@Override
public String toString() {
return "DqComparisonType{"
+ "id=" + id
+ ", type='"
+ type + '\''
+ ", executeSql='" + executeSql + '\''
+ ", outputTable='" + outputTable + '\''
+ ", name='" + name + '\''
+ ", isInnerSource='" + isInnerSource + '\''
+ ", createTime=" + createTime
+ ", updateTime=" + updateTime
+ '}';
}
}

65
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqExecuteResult.java

@ -38,7 +38,7 @@ public class DqExecuteResult implements Serializable {
@TableId(value = "id", type = IdType.AUTO)
private int id;
/**
* process_defined_id
* process defined id
*/
@TableField(value = "process_definition_id")
private long processDefinitionId;
@ -48,7 +48,7 @@ public class DqExecuteResult implements Serializable {
@TableField(exist = false)
private String processDefinitionName;
/**
* process_instance_id
* process instance id
*/
@TableField(value = "process_instance_id")
private long processInstanceId;
@ -58,7 +58,7 @@ public class DqExecuteResult implements Serializable {
@TableField(exist = false)
private String processInstanceName;
/**
* task_instance_id
* task instance id
*/
@TableField(value = "task_instance_id")
private long taskInstanceId;
@ -68,32 +68,42 @@ public class DqExecuteResult implements Serializable {
@TableField(exist = false)
private String taskName;
/**
* rule_type
* rule type
*/
@TableField(value = "rule_type")
private RuleType ruleType;
/**
* rule_name
* rule name
*/
@TableField(value = "rule_name")
private String ruleName;
/**
* statistics_value
* statistics value
*/
@TableField(value = "statistics_value")
private double statisticsValue;
/**
* comparison_value
* comparison value
*/
@TableField(value = "comparison_value")
private double comparisonValue;
/**
* check_type
* comparison type
*/
@TableField(value = "comparison_type")
private int comparisonType;
/**
* comparison type name
*/
@TableField(exist = false)
private String comparisonTypeName;
/**
* check type
*/
@TableField(value = "check_type")
private CheckType checkType;
/**
* task_instance_id
* threshold
*/
@TableField(value = "threshold")
private double threshold;
@ -103,7 +113,7 @@ public class DqExecuteResult implements Serializable {
@TableField(value = "operator")
private int operator;
/**
* operator
* failure strategy
*/
@TableField(value = "failure_strategy")
private int failureStrategy;
@ -113,7 +123,7 @@ public class DqExecuteResult implements Serializable {
@TableField(value = "user_id")
private int userId;
/**
* user_name
* user name
*/
@TableField(exist = false)
private String userName;
@ -122,6 +132,12 @@ public class DqExecuteResult implements Serializable {
*/
@TableField(value = "state")
private DqTaskState state;
/**
* error output path
*/
@TableField(value = "error_output_path")
private String errorOutputPath;
/**
* create_time
*/
@ -295,6 +311,30 @@ public class DqExecuteResult implements Serializable {
this.taskName = taskName;
}
public int getComparisonType() {
return comparisonType;
}
public void setComparisonType(int comparisonType) {
this.comparisonType = comparisonType;
}
public String getComparisonTypeName() {
return comparisonTypeName;
}
public void setComparisonTypeName(String comparisonTypeName) {
this.comparisonTypeName = comparisonTypeName;
}
public String getErrorOutputPath() {
return errorOutputPath;
}
public void setErrorOutputPath(String errorOutputPath) {
this.errorOutputPath = errorOutputPath;
}
@Override
public String toString() {
return "DqExecuteResult{"
@ -309,6 +349,8 @@ public class DqExecuteResult implements Serializable {
+ ", ruleName='" + ruleName + '\''
+ ", statisticsValue=" + statisticsValue
+ ", comparisonValue=" + comparisonValue
+ ", comparisonType=" + comparisonType
+ ", comparisonTypeName=" + comparisonTypeName
+ ", checkType=" + checkType
+ ", threshold=" + threshold
+ ", operator=" + operator
@ -316,6 +358,7 @@ public class DqExecuteResult implements Serializable {
+ ", userId=" + userId
+ ", userName='" + userName + '\''
+ ", state=" + state
+ ", errorOutputPath=" + errorOutputPath
+ ", createTime=" + createTime
+ ", updateTime=" + updateTime
+ '}';

10
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqExecuteResultAlertContent.java

@ -116,6 +116,9 @@ public class DqExecuteResultAlertContent implements Serializable {
@JsonProperty(value = "state")
private DqTaskState state;
@JsonProperty(value = "errorDataPath")
private String errorDataPath;
public DqExecuteResultAlertContent(Builder builder) {
this.processDefinitionId = builder.processDefinitionId;
this.processDefinitionName = builder.processDefinitionName;
@ -134,6 +137,7 @@ public class DqExecuteResultAlertContent implements Serializable {
this.userId = builder.userId;
this.userName = builder.userName;
this.state = builder.state;
this.errorDataPath = builder.errorDataPath;
}
public static Builder newBuilder() {
@ -158,6 +162,7 @@ public class DqExecuteResultAlertContent implements Serializable {
private int userId;
private String userName;
private DqTaskState state;
private String errorDataPath;
public Builder processDefinitionId(long processDefinitionId) {
this.processDefinitionId = processDefinitionId;
@ -244,6 +249,11 @@ public class DqExecuteResultAlertContent implements Serializable {
return this;
}
public Builder errorDataPath(String errorDataPath) {
this.errorDataPath = errorDataPath;
return this;
}
public DqExecuteResultAlertContent build() {
return new DqExecuteResultAlertContent(this);
}

14
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqRuleExecuteSql.java

@ -58,6 +58,11 @@ public class DqRuleExecuteSql implements Serializable {
*/
@TableField(value = "type")
private ExecuteSqlType type = ExecuteSqlType.MIDDLE;
/**
* is error output sql
*/
@TableField(value = "is_error_output_sql")
private boolean isErrorOutputSql;
/**
* create_time
*/
@ -111,6 +116,14 @@ public class DqRuleExecuteSql implements Serializable {
this.type = type;
}
public boolean isErrorOutputSql() {
return isErrorOutputSql;
}
public void setErrorOutputSql(boolean errorOutputSql) {
isErrorOutputSql = errorOutputSql;
}
public Date getCreateTime() {
return createTime;
}
@ -135,6 +148,7 @@ public class DqRuleExecuteSql implements Serializable {
+ ", sql='" + sql + '\''
+ ", tableAlias='" + tableAlias + '\''
+ ", type=" + type
+ ", isErrorOutputSql=" + isErrorOutputSql
+ ", createTime=" + createTime
+ ", updateTime=" + updateTime
+ '}';

224
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/DqTaskStatisticsValue.java

@ -0,0 +1,224 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.dao.entity;
import org.apache.dolphinscheduler.common.enums.dq.RuleType;
import java.io.Serializable;
import java.util.Date;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.annotation.JsonFormat;
@TableName("t_ds_dq_task_statistics_value")
public class DqTaskStatisticsValue implements Serializable {
/**
* primary key
*/
@TableId(value = "id", type = IdType.AUTO)
private int id;
/**
* process defined id
*/
@TableField(value = "process_definition_id")
private long processDefinitionId;
/**
* process definition name
*/
@TableField(exist = false)
private String processDefinitionName;
/**
* task instance id
*/
@TableField(value = "task_instance_id")
private long taskInstanceId;
/**
* task name
*/
@TableField(exist = false)
private String taskName;
/**
* rule id
*/
@TableField(value = "rule_id")
private long ruleId;
/**
* rule type
*/
@TableField(exist = false)
private RuleType ruleType;
/**
* rule name
*/
@TableField(exist = false)
private String ruleName;
/**
* statistics value
*/
@TableField(value = "statistics_value")
private double statisticsValue;
/**
* comparison value
*/
@TableField(value = "statistics_name")
private String statisticsName;
/**
* data time
*/
@TableField(value = "data_time")
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
private Date dataTime;
/**
* create time
*/
@TableField(value = "create_time")
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
private Date createTime;
/**
* update time
*/
@TableField(value = "update_time")
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
private Date updateTime;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public long getProcessDefinitionId() {
return processDefinitionId;
}
public void setProcessDefinitionId(long processDefinitionId) {
this.processDefinitionId = processDefinitionId;
}
public String getProcessDefinitionName() {
return processDefinitionName;
}
public void setProcessDefinitionName(String processDefinitionName) {
this.processDefinitionName = processDefinitionName;
}
public long getTaskInstanceId() {
return taskInstanceId;
}
public void setTaskInstanceId(long taskInstanceId) {
this.taskInstanceId = taskInstanceId;
}
public String getTaskName() {
return taskName;
}
public void setTaskName(String taskName) {
this.taskName = taskName;
}
public long getRuleId() {
return ruleId;
}
public void setRuleId(long ruleId) {
this.ruleId = ruleId;
}
public RuleType getRuleType() {
return ruleType;
}
public void setRuleType(RuleType ruleType) {
this.ruleType = ruleType;
}
public String getRuleName() {
return ruleName;
}
public void setRuleName(String ruleName) {
this.ruleName = ruleName;
}
public double getStatisticsValue() {
return statisticsValue;
}
public void setStatisticsValue(double statisticsValue) {
this.statisticsValue = statisticsValue;
}
public String getStatisticsName() {
return statisticsName;
}
public void setStatisticsName(String statisticsName) {
this.statisticsName = statisticsName;
}
public Date getDataTime() {
return dataTime;
}
public void setDataTime(Date dataTime) {
this.dataTime = dataTime;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
public Date getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Date updateTime) {
this.updateTime = updateTime;
}
@Override
public String toString() {
return "DqTaskStatisticsValue{"
+ "id=" + id
+ ", processDefinitionId=" + processDefinitionId
+ ", processDefinitionName='" + processDefinitionName + '\''
+ ", taskInstanceId=" + taskInstanceId
+ ", taskName='" + taskName + '\''
+ ", ruleId=" + ruleId
+ ", ruleType=" + ruleType
+ ", ruleName='" + ruleName + '\''
+ ", statisticsValue=" + statisticsValue
+ ", statisticsName='" + statisticsName + '\''
+ ", dataTime=" + dataTime
+ ", createTime=" + createTime
+ ", updateTime=" + updateTime
+ '}';
}
}

156
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/entity/TaskAlertContent.java

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.dao.entity;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import java.io.Serializable;
import java.util.Date;
import com.fasterxml.jackson.annotation.JsonFormat;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
@JsonInclude(Include.NON_NULL)
public class TaskAlertContent implements Serializable {
@JsonProperty("taskInstanceId")
private int taskInstanceId;
@JsonProperty("taskName")
private String taskName;
@JsonProperty("taskType")
private String taskType;
@JsonProperty("processDefinitionId")
private int processDefinitionId;
@JsonProperty("processDefinitionName")
private String processDefinitionName;
@JsonProperty("processInstanceId")
private int processInstanceId;
@JsonProperty("processInstanceName")
private String processInstanceName;
@JsonProperty("state")
private ExecutionStatus state;
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
@JsonProperty("startTime")
private Date startTime;
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
@JsonProperty("endTime")
private Date endTime;
@JsonProperty("host")
private String host;
@JsonProperty("logPath")
private String logPath;
private TaskAlertContent(Builder builder) {
this.taskInstanceId = builder.taskInstanceId;
this.taskName = builder.taskName;
this.taskType = builder.taskType;
this.processDefinitionId = builder.processDefinitionId;
this.processDefinitionName = builder.processDefinitionName;
this.processInstanceId = builder.processInstanceId;
this.processInstanceName = builder.processInstanceName;
this.state = builder.state;
this.startTime = builder.startTime;
this.endTime = builder.endTime;
this.host = builder.host;
this.logPath = builder.logPath;
}
public static Builder newBuilder() {
return new Builder();
}
public static class Builder {
private int taskInstanceId;
private String taskName;
private String taskType;
private int processDefinitionId;
private String processDefinitionName;
private int processInstanceId;
private String processInstanceName;
private ExecutionStatus state;
private Date startTime;
private Date endTime;
private String host;
private String logPath;
public Builder taskInstanceId(int taskInstanceId) {
this.taskInstanceId = taskInstanceId;
return this;
}
public Builder taskName(String taskName) {
this.taskName = taskName;
return this;
}
public Builder taskType(String taskType) {
this.taskType = taskType;
return this;
}
public Builder processDefinitionId(int processDefinitionId) {
this.processDefinitionId = processDefinitionId;
return this;
}
public Builder processDefinitionName(String processDefinitionName) {
this.processDefinitionName = processDefinitionName;
return this;
}
public Builder processInstanceId(int processInstanceId) {
this.processInstanceId = processInstanceId;
return this;
}
public Builder processInstanceName(String processInstanceName) {
this.processInstanceName = processInstanceName;
return this;
}
public Builder state(ExecutionStatus state) {
this.state = state;
return this;
}
public Builder startTime(Date startTime) {
this.startTime = startTime;
return this;
}
public Builder endTime(Date endTime) {
this.endTime = endTime;
return this;
}
public Builder host(String host) {
this.host = host;
return this;
}
public Builder logPath(String logPath) {
this.logPath = logPath;
return this;
}
public TaskAlertContent build() {
return new TaskAlertContent(this);
}
}
}

29
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqComparisonTypeMapper.java

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.dao.mapper;
import org.apache.dolphinscheduler.dao.entity.DqComparisonType;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
/**
* DqComparisonTypeMapper
*/
public interface DqComparisonTypeMapper extends BaseMapper<DqComparisonType> {
}

7
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqExecuteResultMapper.java

@ -49,4 +49,11 @@ public interface DqExecuteResultMapper extends BaseMapper<DqExecuteResult> {
@Param("ruleType") int ruleType,
@Param("startTime") Date startTime,
@Param("endTime") Date endTime);
/**
* get execute result by id
* @param taskInstanceId taskInstanceId
* @return DqExecuteResult
*/
DqExecuteResult getExecuteResultById(@Param("taskInstanceId") int taskInstanceId);
}

2
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqRuleMapper.java

@ -36,14 +36,12 @@ public interface DqRuleMapper extends BaseMapper<DqRule> {
*
* @param page page
* @param searchVal searchVal
* @param userId userId
* @param ruleType ruleType
* @param startTime startTime
* @return endTime endTime
*/
IPage<DqRule> queryRuleListPaging(IPage<DqRule> page,
@Param("searchVal") String searchVal,
@Param("userId") int userId,
@Param("ruleType") int ruleType,
@Param("startTime") Date startTime,
@Param("endTime") Date endTime);

29
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/DqTaskStatisticsValueMapper.java

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.dao.mapper;
import org.apache.dolphinscheduler.dao.entity.DqTaskStatisticsValue;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
/**
* DqTaskStatisticsValueMapper
*/
public interface DqTaskStatisticsValueMapper extends BaseMapper<DqTaskStatisticsValue> {
}

22
dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqComparisonTypeMapper.xml

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="org.apache.dolphinscheduler.dao.mapper.DqComparisonTypeMapper">
</mapper>

15
dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqExecuteResultMapper.xml

@ -20,13 +20,14 @@
<mapper namespace="org.apache.dolphinscheduler.dao.mapper.DqExecuteResultMapper">
<select id="queryResultListPaging" resultType="org.apache.dolphinscheduler.dao.entity.DqExecuteResult">
SELECT a.id, a.process_definition_id, b.name as process_definition_name, a.process_instance_id, e.name as process_instance_name,a.task_instance_id, c.name as task_name, a.rule_type, a.rule_name, a.statistics_value, a.comparison_value, a.check_type,
a.threshold,
a.threshold , cp.type as comparison_type_name,
a.operator, a.failure_strategy, a.state, a.user_id, d.user_name, a.create_time, a.update_time
FROM t_ds_dq_execute_result a
left join (select id,name from t_ds_process_definition) b on a.process_definition_id = b.id
left join (select id,name from t_ds_task_instance) c on a.task_instance_id = c.id
left join t_ds_process_instance e on a.process_instance_id = e.id
left join t_ds_user d on d.id = a.user_id
left join t_ds_dq_comparison_type cp on cp.id = a.comparison_type
<where>
<if test=" searchVal != null and searchVal != ''">
and c.name like concat('%', #{searchVal}, '%')
@ -49,4 +50,16 @@
</where>
order by a.update_time desc
</select>
<select id="getExecuteResultById" resultType="org.apache.dolphinscheduler.dao.entity.DqExecuteResult">
SELECT a.*, b.name as process_definition_name, e.name as process_instance_name, c.name as task_name,
cp.type as comparison_type_name, d.user_name
FROM t_ds_dq_execute_result a
left join (select id,name from t_ds_process_definition) b on a.process_definition_id = b.id
left join (select id,name from t_ds_task_instance) c on a.task_instance_id = c.id
left join t_ds_process_instance e on a.process_instance_id = e.id
left join t_ds_user d on d.id = a.user_id
left join t_ds_dq_comparison_type cp on cp.id = a.comparison_type
where task_instance_id = #{taskInstanceId}
</select>
</mapper>

6
dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqRuleMapper.xml

@ -20,8 +20,7 @@
<mapper namespace="org.apache.dolphinscheduler.dao.mapper.DqRuleMapper">
<select id="queryRuleListPaging" resultType="org.apache.dolphinscheduler.dao.entity.DqRule">
SELECT a.id, a.name, a.type, b.user_name, a.create_time, a.update_time
FROM t_ds_dq_rule a
left join t_ds_user b on b.id = a.user_id
FROM t_ds_dq_rule a left join t_ds_user b on a.user_id = b.id
<where>
<if test=" searchVal != null and searchVal != ''">
and a.name like concat('%', #{searchVal}, '%')
@ -29,9 +28,6 @@
<if test="startTime != null ">
and a.update_time > #{startTime} and a.update_time <![CDATA[ <=]]> #{endTime}
</if>
<if test=" userId != 1">
and a.user_id = #{userId}
</if>
<if test=" ruleType != -1">
and a.rule_type = #{ruleType}
</if>

22
dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/DqTaskStatisticsValueMapper.xml

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="org.apache.dolphinscheduler.dao.mapper.DqTaskStatisticsValueMapper">
</mapper>

18
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/Constants.java

@ -38,13 +38,23 @@ public final class Constants {
public static final String DRIVER = "driver";
public static final String DEFAULT_DRIVER = "com.mysql.jdbc.Driver";
public static final String DEFAULT_DATABASE = "default";
public static final String EMPTY = "";
public static final String SQL = "sql";
public static final String DOTS = ".";
public static final String INPUT_TABLE = "input_table";
public static final String OUTPUT_TABLE = "output_table";
public static final String TMP_TABLE = "tmp_table";
public static final String DB_TABLE = "dbtable";
public static final String JDBC = "jdbc";
public static final String SAVE_MODE = "save_mode";
public static final String APPEND = "append";
}

52
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/DataQualityApplication.java

@ -17,20 +17,13 @@
package org.apache.dolphinscheduler.data.quality;
import org.apache.dolphinscheduler.data.quality.configuration.DataQualityConfiguration;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.DataQualityConfiguration;
import org.apache.dolphinscheduler.data.quality.config.EnvConfig;
import org.apache.dolphinscheduler.data.quality.context.DataQualityContext;
import org.apache.dolphinscheduler.data.quality.exception.DataQualityException;
import org.apache.dolphinscheduler.data.quality.flow.DataQualityTask;
import org.apache.dolphinscheduler.data.quality.flow.connector.ConnectorFactory;
import org.apache.dolphinscheduler.data.quality.flow.executor.SparkSqlExecuteTask;
import org.apache.dolphinscheduler.data.quality.flow.writer.WriterFactory;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.utils.JsonUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import java.util.ArrayList;
import java.util.List;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -59,32 +52,15 @@ public class DataQualityApplication {
dataQualityConfiguration.validate();
}
SparkConf conf = new SparkConf().setAppName(dataQualityConfiguration.getName());
conf.set("spark.sql.crossJoin.enabled", "true");
SparkSession sparkSession = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate();
DataQualityContext context = new DataQualityContext(
sparkSession,
dataQualityConfiguration.getConnectorParameters(),
dataQualityConfiguration.getExecutorParameters(),
dataQualityConfiguration.getWriterParams());
execute(buildDataQualityFlow(context));
sparkSession.stop();
}
private static List<DataQualityTask> buildDataQualityFlow(DataQualityContext context) throws DataQualityException {
List<DataQualityTask> taskList =
new ArrayList<>(ConnectorFactory.getInstance().getConnectors(context));
taskList.add(new SparkSqlExecuteTask(context.getSparkSession(),context.getExecutorParameterList()));
taskList.addAll(WriterFactory.getInstance().getWriters(context));
return taskList;
}
private static void execute(List<DataQualityTask> taskList) {
for (DataQualityTask task: taskList) {
task.execute();
EnvConfig envConfig = dataQualityConfiguration.getEnvConfig();
Config config = new Config(envConfig.getConfig());
config.put("type",envConfig.getType());
if (StringUtils.isEmpty(config.getString("spark.app.name"))) {
config.put("spark.app.name",dataQualityConfiguration.getName());
}
SparkRuntimeEnvironment sparkRuntimeEnvironment = new SparkRuntimeEnvironment(config);
DataQualityContext dataQualityContext = new DataQualityContext(sparkRuntimeEnvironment,dataQualityConfiguration);
dataQualityContext.execute();
}
}

10
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/WriterParameter.java → dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/BaseConfig.java

@ -15,7 +15,7 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.configuration;
package org.apache.dolphinscheduler.data.quality.config;
import org.apache.dolphinscheduler.data.quality.utils.Preconditions;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
@ -25,9 +25,9 @@ import java.util.Map;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* WriterParameter
* BaseConfig
*/
public class WriterParameter implements IParameter {
public class BaseConfig implements IConfig {
@JsonProperty("type")
private String type;
@ -35,10 +35,10 @@ public class WriterParameter implements IParameter {
@JsonProperty("config")
private Map<String,Object> config;
public WriterParameter() {
public BaseConfig() {
}
public WriterParameter(String type, Map<String,Object> config) {
public BaseConfig(String type, Map<String,Object> config) {
this.type = type;
this.config = config;
}

94
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/Config.java

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.config;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* Config
*/
public class Config {
private Map<String,Object> configuration = new HashMap<>();
public Config() {
}
public Config(Map<String,Object> configuration) {
if (configuration != null) {
this.configuration = configuration;
}
}
public String getString(String key) {
return configuration.get(key) == null ? null : String.valueOf(configuration.get(key));
}
public List<String> getStringList(String key) {
return (List<String>)configuration.get(key);
}
public Integer getInt(String key) {
return Integer.valueOf(String.valueOf(configuration.get(key)));
}
public Boolean getBoolean(String key) {
return Boolean.valueOf(String.valueOf(configuration.get(key)));
}
public Double getDouble(String key) {
return Double.valueOf(String.valueOf(configuration.get(key)));
}
public Long getLong(String key) {
return Long.valueOf(String.valueOf(configuration.get(key)));
}
public Boolean has(String key) {
return configuration.get(key) != null;
}
public Set<Entry<String, Object>> entrySet() {
return configuration.entrySet();
}
public boolean isEmpty() {
return configuration.size() <= 0;
}
public boolean isNotEmpty() {
return configuration.size() > 0;
}
public void put(String key, Object value) {
this.configuration.put(key,value);
}
public void merge(Map<String, Object> configuration) {
configuration.forEach(this.configuration::putIfAbsent);
}
public Map<String, Object> configurationMap() {
return this.configuration;
}
}

133
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/DataQualityConfiguration.java

@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.config;
import org.apache.dolphinscheduler.data.quality.utils.Preconditions;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* DataQualityConfiguration
*/
public class DataQualityConfiguration implements IConfig {
@JsonProperty("name")
private String name;
@JsonProperty("env")
private EnvConfig envConfig;
@JsonProperty("readers")
private List<ReaderConfig> readerConfigs;
@JsonProperty("transformers")
private List<TransformerConfig> transformerConfigs;
@JsonProperty("writers")
private List<WriterConfig> writerConfigs;
public DataQualityConfiguration(){}
public DataQualityConfiguration(String name,
EnvConfig envConfig,
List<ReaderConfig> readerConfigs,
List<WriterConfig> writerConfigs,
List<TransformerConfig> transformerConfigs) {
this.name = name;
this.envConfig = envConfig;
this.readerConfigs = readerConfigs;
this.writerConfigs = writerConfigs;
this.transformerConfigs = transformerConfigs;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public EnvConfig getEnvConfig() {
return envConfig;
}
public void setEnvConfig(EnvConfig envConfig) {
this.envConfig = envConfig;
}
public List<ReaderConfig> getReaderConfigs() {
return readerConfigs;
}
public void setReaderConfigs(List<ReaderConfig> readerConfigs) {
this.readerConfigs = readerConfigs;
}
public List<TransformerConfig> getTransformerConfigs() {
return transformerConfigs;
}
public void setTransformerConfigs(List<TransformerConfig> transformerConfigs) {
this.transformerConfigs = transformerConfigs;
}
public List<WriterConfig> getWriterConfigs() {
return writerConfigs;
}
public void setWriterConfigs(List<WriterConfig> writerConfigs) {
this.writerConfigs = writerConfigs;
}
@Override
public void validate() {
Preconditions.checkArgument(StringUtils.isNotEmpty(name), "name should not be empty");
Preconditions.checkArgument(envConfig != null, "env config should not be empty");
Preconditions.checkArgument(readerConfigs != null, "reader config should not be empty");
for (ReaderConfig readerConfig : readerConfigs) {
readerConfig.validate();
}
Preconditions.checkArgument(transformerConfigs != null, "transform config should not be empty");
for (TransformerConfig transformParameter : transformerConfigs) {
transformParameter.validate();
}
Preconditions.checkArgument(writerConfigs != null, "writer config should not be empty");
for (WriterConfig writerConfig :writerConfigs) {
writerConfig.validate();
}
}
@Override
public String toString() {
return "DataQualityConfiguration{"
+ "name='" + name + '\''
+ ", envConfig=" + envConfig
+ ", readerConfigs=" + readerConfigs
+ ", transformerConfigs=" + transformerConfigs
+ ", writerConfigs=" + writerConfigs
+ '}';
}
}

18
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/DataQualityTask.java → dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/EnvConfig.java

@ -15,16 +15,20 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow;
package org.apache.dolphinscheduler.data.quality.config;
import java.util.Map;
/**
* DataQualityTask
* EnvConfig
*/
public interface DataQualityTask {
public class EnvConfig extends BaseConfig {
public EnvConfig() {
}
/**
* execute
*/
void execute();
public EnvConfig(String type, Map<String,Object> config) {
super(type,config);
}
}

6
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/IParameter.java → dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/IConfig.java

@ -15,12 +15,12 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.configuration;
package org.apache.dolphinscheduler.data.quality.config;
/**
* IParameter
* IConfig
*/
public interface IParameter {
public interface IConfig {
/**
* check the parameter

14
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/connector/IConnector.java → dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/ReaderConfig.java

@ -15,12 +15,18 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.connector;
package org.apache.dolphinscheduler.data.quality.config;
import org.apache.dolphinscheduler.data.quality.flow.DataQualityTask;
import java.util.Map;
/**
* IConnector
* ReaderConfig
*/
public interface IConnector extends DataQualityTask {
public class ReaderConfig extends BaseConfig {
public ReaderConfig() {}
public ReaderConfig(String type, Map<String,Object> config) {
super(type, config);
}
}

32
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/TransformerConfig.java

@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.config;
import java.util.Map;
/**
* TransformerConfig
*/
public class TransformerConfig extends BaseConfig {
public TransformerConfig() {}
public TransformerConfig(String type, Map<String,Object> config) {
super(type, config);
}
}

46
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/ValidateResult.java

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.config;
public class ValidateResult {
private boolean success;
private String msg;
public ValidateResult(boolean success, String msg) {
this.success = success;
this.msg = msg;
}
public boolean isSuccess() {
return success;
}
public void setSuccess(boolean success) {
this.success = success;
}
public String getMsg() {
return msg;
}
public void setMsg(String msg) {
this.msg = msg;
}
}

14
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/writer/IWriter.java → dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/config/WriterConfig.java

@ -15,12 +15,18 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.writer;
package org.apache.dolphinscheduler.data.quality.config;
import org.apache.dolphinscheduler.data.quality.flow.DataQualityTask;
import java.util.Map;
/**
* IWriter
* WriterConfig
*/
public interface IWriter extends DataQualityTask {
public class WriterConfig extends BaseConfig {
public WriterConfig() {}
public WriterConfig(String type, Map<String,Object> config) {
super(type, config);
}
}

67
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/ConnectorParameter.java

@ -1,67 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.configuration;
import org.apache.dolphinscheduler.data.quality.utils.Preconditions;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* ConnectorParameter
*/
public class ConnectorParameter implements IParameter {
@JsonProperty("type")
private String type;
@JsonProperty("config")
private Map<String,Object> config;
public ConnectorParameter(){
}
public ConnectorParameter(String type, Map<String,Object> config) {
this.type = type;
this.config = config;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public Map<String, Object> getConfig() {
return config;
}
public void setConfig(Map<String, Object> config) {
this.config = config;
}
@Override
public void validate() {
Preconditions.checkArgument(StringUtils.isNotEmpty(type), "type should not be empty");
Preconditions.checkArgument(config != null, "config should not be empty");
}
}

104
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/DataQualityConfiguration.java

@ -1,104 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.configuration;
import org.apache.dolphinscheduler.data.quality.utils.Preconditions;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* DataQualityConfiguration
*/
public class DataQualityConfiguration implements IParameter {
@JsonProperty("name")
private String name;
@JsonProperty("connectors")
private List<ConnectorParameter> connectorParameters;
@JsonProperty("writers")
private List<WriterParameter> writerParams;
@JsonProperty("executors")
private List<ExecutorParameter> executorParameters;
public DataQualityConfiguration(){}
public DataQualityConfiguration(String name,
List<ConnectorParameter> connectorParameters,
List<WriterParameter> writerParams,
List<ExecutorParameter> executorParameters) {
this.name = name;
this.connectorParameters = connectorParameters;
this.writerParams = writerParams;
this.executorParameters = executorParameters;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<ConnectorParameter> getConnectorParameters() {
return connectorParameters;
}
public void setConnectorParameters(List<ConnectorParameter> connectorParameters) {
this.connectorParameters = connectorParameters;
}
public List<WriterParameter> getWriterParams() {
return writerParams;
}
public void setWriterParams(List<WriterParameter> writerParams) {
this.writerParams = writerParams;
}
public List<ExecutorParameter> getExecutorParameters() {
return executorParameters;
}
public void setExecutorParameters(List<ExecutorParameter> executorParameters) {
this.executorParameters = executorParameters;
}
@Override
public void validate() {
Preconditions.checkArgument(StringUtils.isNotEmpty(name), "name should not be empty");
Preconditions.checkArgument(connectorParameters != null, "connector param should not be empty");
for (ConnectorParameter connectorParameter:connectorParameters) {
connectorParameter.validate();
}
Preconditions.checkArgument(writerParams != null, "writer param should not be empty");
for (WriterParameter writerParameter:writerParams) {
writerParameter.validate();
}
Preconditions.checkArgument(executorParameters != null, "executor param should not be empty");
for (ExecutorParameter executorParameter:executorParameters) {
executorParameter.validate();
}
}
}

77
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/configuration/ExecutorParameter.java

@ -1,77 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.configuration;
import org.apache.dolphinscheduler.data.quality.utils.Preconditions;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* ExecutorParameter
*/
public class ExecutorParameter implements IParameter {
@JsonProperty("index")
private String index;
@JsonProperty("execute.sql")
private String executeSql;
@JsonProperty("table.alias")
private String tableAlias;
public ExecutorParameter() {
}
public ExecutorParameter(String index, String executeSql, String tableAlias) {
this.index = index;
this.executeSql = executeSql;
this.tableAlias = tableAlias;
}
public String getIndex() {
return index;
}
public void setIndex(String index) {
this.index = index;
}
public String getExecuteSql() {
return executeSql;
}
public void setExecuteSql(String executeSql) {
this.executeSql = executeSql;
}
public String getTableAlias() {
return tableAlias;
}
public void setTableAlias(String tableAlias) {
this.tableAlias = tableAlias;
}
@Override
public void validate() {
Preconditions.checkArgument(index != null, "index should not be empty");
Preconditions.checkArgument(executeSql != null, "executeSql should not be empty");
Preconditions.checkArgument(tableAlias != null, "tableAlias should not be empty");
}
}

80
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/context/DataQualityContext.java

@ -17,11 +17,15 @@
package org.apache.dolphinscheduler.data.quality.context;
import org.apache.dolphinscheduler.data.quality.configuration.ConnectorParameter;
import org.apache.dolphinscheduler.data.quality.configuration.ExecutorParameter;
import org.apache.dolphinscheduler.data.quality.configuration.WriterParameter;
import org.apache.spark.sql.SparkSession;
import org.apache.dolphinscheduler.data.quality.config.DataQualityConfiguration;
import org.apache.dolphinscheduler.data.quality.exception.DataQualityException;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchReader;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchTransformer;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchWriter;
import org.apache.dolphinscheduler.data.quality.flow.batch.reader.ReaderFactory;
import org.apache.dolphinscheduler.data.quality.flow.batch.transformer.TransformerFactory;
import org.apache.dolphinscheduler.data.quality.flow.batch.writer.WriterFactory;
import java.util.List;
@ -30,56 +34,34 @@ import java.util.List;
*/
public class DataQualityContext {
private SparkSession sparkSession;
private List<ConnectorParameter> connectorParameterList;
private List<ExecutorParameter> executorParameterList;
private SparkRuntimeEnvironment sparkRuntimeEnvironment;
private List<WriterParameter> writerParamList;
private DataQualityConfiguration dataQualityConfiguration;
public DataQualityContext() {
}
public DataQualityContext(SparkSession sparkSession,
List<ConnectorParameter> connectorParameterList,
List<ExecutorParameter> executorParameterList,
List<WriterParameter> writerParamList) {
this.sparkSession = sparkSession;
this.connectorParameterList = connectorParameterList;
this.executorParameterList = executorParameterList;
this.writerParamList = writerParamList;
}
public SparkSession getSparkSession() {
return sparkSession;
}
public void setSparkSession(SparkSession sparkSession) {
this.sparkSession = sparkSession;
}
public List<ConnectorParameter> getConnectorParameterList() {
return connectorParameterList;
}
public void setConnectorParameterList(List<ConnectorParameter> connectorParameterList) {
this.connectorParameterList = connectorParameterList;
}
public List<ExecutorParameter> getExecutorParameterList() {
return executorParameterList;
}
public void setExecutorParameterList(List<ExecutorParameter> executorParameterList) {
this.executorParameterList = executorParameterList;
}
public List<WriterParameter> getWriterParamList() {
return writerParamList;
public DataQualityContext(SparkRuntimeEnvironment sparkRuntimeEnvironment,
DataQualityConfiguration dataQualityConfiguration) {
this.sparkRuntimeEnvironment = sparkRuntimeEnvironment;
this.dataQualityConfiguration = dataQualityConfiguration;
}
public void setWriterParamList(List<WriterParameter> writerParamList) {
this.writerParamList = writerParamList;
public void execute() throws DataQualityException {
List<BatchReader> readers = ReaderFactory
.getInstance()
.getReaders(this.sparkRuntimeEnvironment,dataQualityConfiguration.getReaderConfigs());
List<BatchTransformer> transformers = TransformerFactory
.getInstance()
.getTransformer(this.sparkRuntimeEnvironment,dataQualityConfiguration.getTransformerConfigs());
List<BatchWriter> writers = WriterFactory
.getInstance()
.getWriters(this.sparkRuntimeEnvironment,dataQualityConfiguration.getWriterConfigs());
if (sparkRuntimeEnvironment.isBatch()) {
sparkRuntimeEnvironment.getBatchExecution().execute(readers,transformers,writers);
} else {
throw new DataQualityException("stream mode is not supported now");
}
}
}

8
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/enums/ConnectorType.java → dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/enums/ReaderType.java

@ -18,9 +18,9 @@
package org.apache.dolphinscheduler.data.quality.enums;
/**
* ConnectorType
* ReaderType
*/
public enum ConnectorType {
public enum ReaderType {
/**
* JDBC
* HIVE
@ -28,8 +28,8 @@ public enum ConnectorType {
JDBC,
HIVE;
public static ConnectorType getType(String name) {
for (ConnectorType type: ConnectorType.values()) {
public static ReaderType getType(String name) {
for (ReaderType type: ReaderType.values()) {
if (type.name().equalsIgnoreCase(name)) {
return type;
}

38
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/enums/TransformerType.java

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.enums;
/**
* TransformerType
*/
public enum TransformerType {
/**
* JDBC
*/
SQL;
public static TransformerType getType(String name) {
for (TransformerType type: TransformerType.values()) {
if (type.name().equalsIgnoreCase(name)) {
return type;
}
}
return null;
}
}

4
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/enums/WriterType.java

@ -24,7 +24,9 @@ public enum WriterType {
/**
* JDBC
*/
JDBC;
JDBC,
LOCAL_FILE,
HDFS_FILE;
public static WriterType getType(String name) {
for (WriterType type: WriterType.values()) {

40
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/exception/ConfigRuntimeException.java

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.exception;
/**
* ConfigRuntimeException
*/
public class ConfigRuntimeException extends RuntimeException {
public ConfigRuntimeException() {
super();
}
public ConfigRuntimeException(String message) {
super(message);
}
public ConfigRuntimeException(String message, Throwable cause) {
super(message, cause);
}
public ConfigRuntimeException(Throwable cause) {
super(cause);
}
}

35
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/execution/Execution.java

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.execution;
import org.apache.dolphinscheduler.data.quality.flow.Component;
import java.util.List;
/**
* Execution
*/
public interface Execution<R extends Component, T extends Component, W extends Component> {
/**
* execute
* @param readers readers
* @param transformers transformers
* @param writers writers
*/
void execute(List<R> readers, List<T> transformers, List<W> writers);
}

132
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/execution/SparkBatchExecution.java

@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.execution;
import static org.apache.dolphinscheduler.data.quality.Constants.INPUT_TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.OUTPUT_TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.TMP_TABLE;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.exception.ConfigRuntimeException;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchReader;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchTransformer;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import java.util.List;
/**
* SparkBatchExecution
*/
public class SparkBatchExecution implements Execution<BatchReader, BatchTransformer, BatchWriter> {
private final SparkRuntimeEnvironment environment;
public SparkBatchExecution(SparkRuntimeEnvironment environment) throws ConfigRuntimeException {
this.environment = environment;
}
@Override
public void execute(List<BatchReader> readers, List<BatchTransformer> transformers, List<BatchWriter> writers) {
readers.forEach(reader -> registerInputTempView(reader, environment));
if (!readers.isEmpty()) {
Dataset<Row> ds = readers.get(0).read(environment);
for (BatchTransformer tf:transformers) {
ds = executeTransformer(environment, tf, ds);
registerTransformTempView(tf, ds);
}
for (BatchWriter sink: writers) {
executeWriter(environment, sink, ds);
}
}
environment.sparkSession().stop();
}
private void registerTempView(String tableName, Dataset<Row> ds) {
if (ds != null) {
ds.createOrReplaceTempView(tableName);
} else {
throw new ConfigRuntimeException("dataset is null, can not createOrReplaceTempView");
}
}
private void registerInputTempView(BatchReader reader, SparkRuntimeEnvironment environment) {
Config conf = reader.getConfig();
if (Boolean.TRUE.equals(conf.has(OUTPUT_TABLE))) {
String tableName = conf.getString(OUTPUT_TABLE);
registerTempView(tableName, reader.read(environment));
} else {
throw new ConfigRuntimeException(
"[" + reader.getClass().getName() + "] must be registered as dataset, please set \"output_table\" config");
}
}
private Dataset<Row> executeTransformer(SparkRuntimeEnvironment environment, BatchTransformer transformer, Dataset<Row> dataset) {
Config config = transformer.getConfig();
Dataset<Row> inputDataset;
Dataset<Row> outputDataset = null;
if (Boolean.TRUE.equals(config.has(INPUT_TABLE))) {
String[] tableNames = config.getString(INPUT_TABLE).split(",");
for (String sourceTableName: tableNames) {
inputDataset = environment.sparkSession().read().table(sourceTableName);
if (outputDataset == null) {
outputDataset = inputDataset;
} else {
outputDataset = outputDataset.union(inputDataset);
}
}
} else {
outputDataset = dataset;
}
if (Boolean.TRUE.equals(config.has(TMP_TABLE))) {
if (outputDataset == null) {
outputDataset = dataset;
}
String tableName = config.getString(TMP_TABLE);
registerTempView(tableName, outputDataset);
}
return transformer.transform(outputDataset, environment);
}
private void registerTransformTempView(BatchTransformer transformer, Dataset<Row> ds) {
Config config = transformer.getConfig();
if (Boolean.TRUE.equals(config.has(OUTPUT_TABLE))) {
String tableName = config.getString(OUTPUT_TABLE);
registerTempView(tableName, ds);
}
}
private void executeWriter(SparkRuntimeEnvironment environment, BatchWriter writer, Dataset<Row> ds) {
Config config = writer.getConfig();
Dataset<Row> inputDataSet = ds;
if (Boolean.TRUE.equals(config.has(INPUT_TABLE))) {
String sourceTableName = config.getString(INPUT_TABLE);
inputDataSet = environment.sparkSession().read().table(sourceTableName);
}
writer.write(inputDataSet, environment);
}
}

72
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/execution/SparkRuntimeEnvironment.java

@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.execution;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
/**
* SparkRuntimeEnvironment
*/
public class SparkRuntimeEnvironment {
private static final String TYPE = "type";
private static final String BATCH = "batch";
private SparkSession sparkSession;
private Config config = new Config();
public SparkRuntimeEnvironment(Config config) {
if (config != null) {
this.config = config;
}
this.prepare();
}
public Config getConfig() {
return this.config;
}
public void prepare() {
sparkSession = SparkSession.builder().config(createSparkConf()).getOrCreate();
}
private SparkConf createSparkConf() {
SparkConf conf = new SparkConf();
this.config.entrySet()
.forEach(entry -> conf.set(entry.getKey(), String.valueOf(entry.getValue())));
conf.set("spark.sql.crossJoin.enabled","true");
return conf;
}
public SparkSession sparkSession() {
return sparkSession;
}
public boolean isBatch() {
return BATCH.equalsIgnoreCase(config.getString(TYPE));
}
public SparkBatchExecution getBatchExecution() {
return new SparkBatchExecution(this);
}
}

56
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/Component.java

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* Component
*/
public interface Component {
Config getConfig();
ValidateResult validateConfig();
default ValidateResult validate(List<String> requiredOptions) {
List<String> nonExistsOptions = new ArrayList<>();
requiredOptions.forEach(x -> {
if (Boolean.FALSE.equals(getConfig().has(x))) {
nonExistsOptions.add(x);
}
});
if (!nonExistsOptions.isEmpty()) {
return new ValidateResult(
false,
nonExistsOptions.stream().map(option ->
"[" + option + "]").collect(Collectors.joining(",")) + " is not exist");
} else {
return new ValidateResult(true, "");
}
}
void prepare(SparkRuntimeEnvironment prepareEnv);
}

90
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/JdbcBaseConfig.java

@ -1,90 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow;
import static org.apache.dolphinscheduler.data.quality.Constants.DATABASE;
import static org.apache.dolphinscheduler.data.quality.Constants.DEFAULT_DATABASE;
import static org.apache.dolphinscheduler.data.quality.Constants.DEFAULT_DRIVER;
import static org.apache.dolphinscheduler.data.quality.Constants.DRIVER;
import static org.apache.dolphinscheduler.data.quality.Constants.EMPTY;
import static org.apache.dolphinscheduler.data.quality.Constants.PASSWORD;
import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.URL;
import static org.apache.dolphinscheduler.data.quality.Constants.USER;
import org.apache.dolphinscheduler.data.quality.Constants;
import java.util.Map;
/**
* JdbcBaseConfig
*/
public class JdbcBaseConfig {
private String database;
private String table;
private String dbTable;
private String url;
private String user;
private String password;
private String driver;
public JdbcBaseConfig(Map<String,Object> config) {
database = String.valueOf(config.getOrDefault(DATABASE,DEFAULT_DATABASE));
table = String.valueOf(config.getOrDefault(TABLE,EMPTY));
dbTable = database + Constants.DOTS + table;
url = String.valueOf(config.getOrDefault(URL,EMPTY));
user = String.valueOf(config.getOrDefault(USER,EMPTY));
password = String.valueOf(config.getOrDefault(PASSWORD,EMPTY));
driver = String.valueOf(config.getOrDefault(DRIVER,DEFAULT_DRIVER));
}
public String getDatabase() {
return database;
}
public String getTable() {
return table;
}
public String getDbTable() {
return dbTable;
}
public String getUrl() {
return url;
}
public String getUser() {
return user;
}
public String getPassword() {
return password;
}
public String getDriver() {
return driver;
}
}

37
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/BatchReader.java

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.Component;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
/**
* BatchReader
*/
public interface BatchReader extends Component {
/**
* read data from source return dataset
* @param env env
* @return Dataset<Row>
*/
Dataset<Row> read(SparkRuntimeEnvironment env);
}

38
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/BatchTransformer.java

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.Component;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
/**
* BatchTransformer
*/
public interface BatchTransformer extends Component {
/**
* transform the dataset
* @param data data
* @param env env
* @return Dataset<Row>
*/
Dataset<Row> transform(Dataset<Row> data, SparkRuntimeEnvironment env);
}

37
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/BatchWriter.java

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.Component;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
/**
* BatchWriter
*/
public interface BatchWriter extends Component {
/**
* write data to target storage
* @param data data
* @param environment environment
*/
void write(Dataset<Row> data, SparkRuntimeEnvironment environment);
}

68
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/reader/HiveReader.java

@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.reader;
import static org.apache.dolphinscheduler.data.quality.Constants.DATABASE;
import static org.apache.dolphinscheduler.data.quality.Constants.SQL;
import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchReader;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import java.util.Arrays;
/**
* HiveReader
*/
public class HiveReader implements BatchReader {
private final Config config;
public HiveReader(Config config) {
this.config = config;
}
@Override
public Config getConfig() {
return config;
}
@Override
public ValidateResult validateConfig() {
return validate(Arrays.asList(DATABASE, TABLE));
}
@Override
public void prepare(SparkRuntimeEnvironment prepareEnv) {
if (StringUtils.isEmpty(config.getString(SQL))) {
config.put(SQL,"select * from " + config.getString(DATABASE) + "." + config.getString(TABLE));
}
}
@Override
public Dataset<Row> read(SparkRuntimeEnvironment env) {
return env.sparkSession().sql(config.getString(SQL));
}
}

95
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/reader/JdbcReader.java

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.reader;
import static org.apache.dolphinscheduler.data.quality.Constants.DB_TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.DOTS;
import static org.apache.dolphinscheduler.data.quality.Constants.DRIVER;
import static org.apache.dolphinscheduler.data.quality.Constants.JDBC;
import static org.apache.dolphinscheduler.data.quality.Constants.PASSWORD;
import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.URL;
import static org.apache.dolphinscheduler.data.quality.Constants.USER;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchReader;
import org.apache.dolphinscheduler.data.quality.utils.ConfigUtils;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
/**
* AbstractJdbcSource
*/
public class JdbcReader implements BatchReader {
private final Config config;
public JdbcReader(Config config) {
this.config = config;
}
@Override
public Config getConfig() {
return config;
}
@Override
public ValidateResult validateConfig() {
return validate(Arrays.asList(URL, TABLE, USER, PASSWORD));
}
@Override
public void prepare(SparkRuntimeEnvironment prepareEnv) {
// Do nothing
}
@Override
public Dataset<Row> read(SparkRuntimeEnvironment env) {
return jdbcReader(env.sparkSession()).load();
}
private DataFrameReader jdbcReader(SparkSession sparkSession) {
DataFrameReader reader = sparkSession.read()
.format(JDBC)
.option(URL, config.getString(URL))
.option(DB_TABLE, config.getString(TABLE))
.option(USER, config.getString(USER))
.option(PASSWORD, config.getString(PASSWORD))
.option(DRIVER, config.getString(DRIVER));
Config jdbcConfig = ConfigUtils.extractSubConfig(config, JDBC + DOTS, false);
if (!config.isEmpty()) {
Map<String,String> optionMap = new HashMap<>(16);
jdbcConfig.entrySet().forEach(x -> optionMap.put(x.getKey(),String.valueOf(x.getValue())));
reader.options(optionMap);
}
return reader;
}
}

76
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/reader/ReaderFactory.java

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.reader;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ReaderConfig;
import org.apache.dolphinscheduler.data.quality.enums.ReaderType;
import org.apache.dolphinscheduler.data.quality.exception.DataQualityException;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchReader;
import java.util.ArrayList;
import java.util.List;
/**
* ReaderFactory
*/
public class ReaderFactory {
private static class Singleton {
static ReaderFactory instance = new ReaderFactory();
}
public static ReaderFactory getInstance() {
return Singleton.instance;
}
public List<BatchReader> getReaders(SparkRuntimeEnvironment sparkRuntimeEnvironment, List<ReaderConfig> readerConfigs) throws DataQualityException {
List<BatchReader> readerList = new ArrayList<>();
for (ReaderConfig readerConfig : readerConfigs) {
BatchReader reader = getReader(readerConfig);
if (reader != null) {
reader.validateConfig();
reader.prepare(sparkRuntimeEnvironment);
readerList.add(reader);
}
}
return readerList;
}
private BatchReader getReader(ReaderConfig readerConfig) throws DataQualityException {
ReaderType readerType = ReaderType.getType(readerConfig.getType());
Config config = new Config(readerConfig.getConfig());
if (readerType != null) {
switch (readerType) {
case JDBC:
return new JdbcReader(config);
case HIVE:
return new HiveReader(config);
default:
throw new DataQualityException("reader type " + readerType + " is not supported!");
}
}
return null;
}
}

62
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/transformer/SqlTransformer.java

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.transformer;
import static org.apache.dolphinscheduler.data.quality.Constants.SQL;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchTransformer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import java.util.Collections;
/**
* SqlTransformer
*/
public class SqlTransformer implements BatchTransformer {
private final Config config;
public SqlTransformer(Config config) {
this.config = config;
}
@Override
public Config getConfig() {
return config;
}
@Override
public ValidateResult validateConfig() {
return validate(Collections.singletonList(SQL));
}
@Override
public void prepare(SparkRuntimeEnvironment prepareEnv) {
// Do nothing
}
@Override
public Dataset<Row> transform(Dataset<Row> data, SparkRuntimeEnvironment env) {
return env.sparkSession().sql(config.getString(SQL));
}
}

72
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/transformer/TransformerFactory.java

@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.transformer;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.TransformerConfig;
import org.apache.dolphinscheduler.data.quality.enums.TransformerType;
import org.apache.dolphinscheduler.data.quality.exception.DataQualityException;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchTransformer;
import java.util.ArrayList;
import java.util.List;
/**
* WriterFactory
*/
public class TransformerFactory {
private static class Singleton {
static TransformerFactory instance = new TransformerFactory();
}
public static TransformerFactory getInstance() {
return Singleton.instance;
}
public List<BatchTransformer> getTransformer(SparkRuntimeEnvironment sparkRuntimeEnvironment, List<TransformerConfig> transformerConfigs) throws DataQualityException {
List<BatchTransformer> transformers = new ArrayList<>();
for (TransformerConfig transformerConfig:transformerConfigs) {
BatchTransformer transformer = getTransformer(transformerConfig);
if (transformer != null) {
transformer.validateConfig();
transformer.prepare(sparkRuntimeEnvironment);
transformers.add(transformer);
}
}
return transformers;
}
private BatchTransformer getTransformer(TransformerConfig transformerConfig) throws DataQualityException {
TransformerType transformerType = TransformerType.getType(transformerConfig.getType());
Config config = new Config(transformerConfig.getConfig());
if (transformerType != null) {
if (transformerType == TransformerType.SQL) {
return new SqlTransformer(config);
}
throw new DataQualityException("transformer type " + transformerType + " is not supported!");
}
return null;
}
}

86
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/JdbcWriter.java

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.writer;
import static org.apache.dolphinscheduler.data.quality.Constants.APPEND;
import static org.apache.dolphinscheduler.data.quality.Constants.DB_TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.DRIVER;
import static org.apache.dolphinscheduler.data.quality.Constants.JDBC;
import static org.apache.dolphinscheduler.data.quality.Constants.PASSWORD;
import static org.apache.dolphinscheduler.data.quality.Constants.SAVE_MODE;
import static org.apache.dolphinscheduler.data.quality.Constants.SQL;
import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.URL;
import static org.apache.dolphinscheduler.data.quality.Constants.USER;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchWriter;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import java.util.Arrays;
/**
* JdbcWriter
*/
public class JdbcWriter implements BatchWriter {
private final Config config;
public JdbcWriter(Config config) {
this.config = config;
}
@Override
public Config getConfig() {
return config;
}
@Override
public ValidateResult validateConfig() {
return validate(Arrays.asList(URL, TABLE, USER, PASSWORD));
}
@Override
public void prepare(SparkRuntimeEnvironment prepareEnv) {
if (StringUtils.isEmpty(config.getString(SAVE_MODE))) {
config.put(SAVE_MODE,APPEND);
}
}
@Override
public void write(Dataset<Row> data, SparkRuntimeEnvironment env) {
if (!StringUtils.isBlank(config.getString(SQL))) {
data = env.sparkSession().sql(config.getString(SQL));
}
data.write()
.format(JDBC)
.option(DRIVER,config.getString(DRIVER))
.option(URL,config.getString(URL))
.option(DB_TABLE, config.getString(TABLE))
.option(USER, config.getString(USER))
.option(PASSWORD, config.getString(PASSWORD))
.mode(config.getString(SAVE_MODE))
.save();
}
}

81
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/WriterFactory.java

@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.writer;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.WriterConfig;
import org.apache.dolphinscheduler.data.quality.enums.WriterType;
import org.apache.dolphinscheduler.data.quality.exception.DataQualityException;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchWriter;
import org.apache.dolphinscheduler.data.quality.flow.batch.writer.file.HdfsFileWriter;
import org.apache.dolphinscheduler.data.quality.flow.batch.writer.file.LocalFileWriter;
import java.util.ArrayList;
import java.util.List;
/**
* WriterFactory
*/
public class WriterFactory {
private static class Singleton {
static WriterFactory instance = new WriterFactory();
}
public static WriterFactory getInstance() {
return Singleton.instance;
}
public List<BatchWriter> getWriters(SparkRuntimeEnvironment sparkRuntimeEnvironment, List<WriterConfig> writerConfigs) throws DataQualityException {
List<BatchWriter> writerList = new ArrayList<>();
for (WriterConfig writerConfig:writerConfigs) {
BatchWriter writer = getWriter(writerConfig);
if (writer != null) {
writer.validateConfig();
writer.prepare(sparkRuntimeEnvironment);
writerList.add(writer);
}
}
return writerList;
}
private BatchWriter getWriter(WriterConfig writerConfig) throws DataQualityException {
WriterType writerType = WriterType.getType(writerConfig.getType());
Config config = new Config(writerConfig.getConfig());
if (writerType != null) {
switch (writerType) {
case JDBC:
return new JdbcWriter(config);
case LOCAL_FILE:
return new LocalFileWriter(config);
case HDFS_FILE:
return new HdfsFileWriter(config);
default:
throw new DataQualityException("writer type " + writerType + " is not supported!");
}
}
return null;
}
}

130
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/file/BaseFileWriter.java

@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.writer.file;
import static org.apache.dolphinscheduler.data.quality.Constants.SAVE_MODE;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchWriter;
import org.apache.dolphinscheduler.data.quality.utils.ConfigUtils;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import org.apache.commons.collections.CollectionUtils;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* BaseFileWriter
*/
public abstract class BaseFileWriter implements BatchWriter {
public static final String PARTITION_BY = "partition_by";
public static final String SERIALIZER = "serializer";
public static final String PATH = "path";
private final Config config;
protected BaseFileWriter(Config config) {
this.config = config;
}
@Override
public Config getConfig() {
return config;
}
@Override
public void prepare(SparkRuntimeEnvironment prepareEnv) {
Map<String,Object> defaultConfig = new HashMap<>();
defaultConfig.put(PARTITION_BY, Collections.emptyList());
defaultConfig.put(SAVE_MODE,"error");
defaultConfig.put(SERIALIZER,"json");
config.merge(defaultConfig);
}
protected ValidateResult checkConfigImpl(List<String> allowedUri) {
if (Boolean.TRUE.equals(config.has(PATH)) && StringUtils.isNotEmpty(config.getString(PATH))) {
String dir = config.getString(PATH);
if (dir.startsWith("/") || uriInAllowedSchema(dir, allowedUri)) {
return new ValidateResult(true, "");
} else {
return new ValidateResult(false, "invalid path URI, please set the following allowed schemas: " + String.join(",", allowedUri));
}
} else {
return new ValidateResult(false, "please specify [path] as non-empty string");
}
}
protected boolean uriInAllowedSchema(String uri, List<String> allowedUri) {
return allowedUri.stream().map(uri::startsWith).reduce(true, (a, b) -> a && b);
}
protected String buildPathWithDefaultSchema(String uri, String defaultUriSchema) {
return uri.startsWith("/") ? defaultUriSchema + uri : uri;
}
protected void outputImpl(Dataset<Row> df, String defaultUriSchema) {
DataFrameWriter<Row> writer = df.write().mode(config.getString(SAVE_MODE));
if (CollectionUtils.isNotEmpty(config.getStringList(PARTITION_BY))) {
List<String> partitionKeys = config.getStringList(PARTITION_BY);
writer.partitionBy(partitionKeys.toArray(new String[]{}));
}
Config fileConfig = ConfigUtils.extractSubConfig(config, "options.", false);
if (fileConfig.isNotEmpty()) {
Map<String,String> optionMap = new HashMap<>(16);
fileConfig.entrySet().forEach(x -> optionMap.put(x.getKey(),String.valueOf(x.getValue())));
writer.options(optionMap);
}
String path = buildPathWithDefaultSchema(config.getString(PATH), defaultUriSchema);
switch (config.getString(SERIALIZER)) {
case "csv":
writer.csv(path);
break;
case "json":
writer.json(path);
break;
case "parquet":
writer.parquet(path);
break;
case "text":
writer.text(path);
break;
case "orc":
writer.orc(path);
break;
default:
break;
}
}
}

48
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/ConnectorParameter.java → dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/file/HdfsFileWriter.java

@ -15,45 +15,33 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter;
package org.apache.dolphinscheduler.data.quality.flow.batch.writer.file;
import java.util.Map;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import java.util.Collections;
/**
* ConnectorParameter
* HdfsFileWriter
*/
public class ConnectorParameter {
@JsonProperty("type")
private String type;
@JsonProperty("config")
private Map<String,Object> config;
public ConnectorParameter(){
}
public class HdfsFileWriter extends BaseFileWriter {
public ConnectorParameter(String type, Map<String,Object> config) {
this.type = type;
this.config = config;
public HdfsFileWriter(Config config) {
super(config);
}
public String getType() {
return type;
@Override
public void write(Dataset<Row> data, SparkRuntimeEnvironment environment) {
outputImpl(data,"hdfs://");
}
public void setType(String type) {
this.type = type;
@Override
public ValidateResult validateConfig() {
return checkConfigImpl(Collections.singletonList("hdfs://"));
}
public Map<String, Object> getConfig() {
return config;
}
public void setConfig(Map<String, Object> config) {
this.config = config;
}
}

47
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/batch/writer/file/LocalFileWriter.java

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.batch.writer.file;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.config.ValidateResult;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import java.util.Collections;
/**
* LocalFileWriter
*/
public class LocalFileWriter extends BaseFileWriter {
public LocalFileWriter(Config config) {
super(config);
}
@Override
public void write(Dataset<Row> data, SparkRuntimeEnvironment environment) {
outputImpl(data,"file://");
}
@Override
public ValidateResult validateConfig() {
return checkConfigImpl(Collections.singletonList("file://"));
}
}

73
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/connector/ConnectorFactory.java

@ -1,73 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.connector;
import org.apache.dolphinscheduler.data.quality.configuration.ConnectorParameter;
import org.apache.dolphinscheduler.data.quality.context.DataQualityContext;
import org.apache.dolphinscheduler.data.quality.enums.ConnectorType;
import org.apache.dolphinscheduler.data.quality.exception.DataQualityException;
import org.apache.spark.sql.SparkSession;
import java.util.ArrayList;
import java.util.List;
/**
* ConnectorFactory
*/
public class ConnectorFactory {
private static class Singleton {
static ConnectorFactory instance = new ConnectorFactory();
}
public static ConnectorFactory getInstance() {
return Singleton.instance;
}
public List<IConnector> getConnectors(DataQualityContext context) throws DataQualityException {
List<IConnector> connectorList = new ArrayList<>();
for (ConnectorParameter connectorParameter :context.getConnectorParameterList()) {
IConnector connector = getConnector(context.getSparkSession(), connectorParameter);
if (connector != null) {
connectorList.add(connector);
}
}
return connectorList;
}
private IConnector getConnector(SparkSession sparkSession,ConnectorParameter connectorParameter) throws DataQualityException {
ConnectorType connectorType = ConnectorType.getType(connectorParameter.getType());
if (connectorType != null) {
switch (connectorType) {
case HIVE:
return new HiveConnector(sparkSession, connectorParameter);
case JDBC:
return new JdbcConnector(sparkSession, connectorParameter);
default:
throw new DataQualityException("connector type ${connectorType} is not supported!");
}
}
return null;
}
}

55
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/connector/HiveConnector.java

@ -1,55 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.connector;
import static org.apache.dolphinscheduler.data.quality.Constants.DATABASE;
import static org.apache.dolphinscheduler.data.quality.Constants.DEFAULT_DATABASE;
import static org.apache.dolphinscheduler.data.quality.Constants.EMPTY;
import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import org.apache.dolphinscheduler.data.quality.Constants;
import org.apache.dolphinscheduler.data.quality.configuration.ConnectorParameter;
import org.apache.spark.sql.SparkSession;
import java.util.Map;
/**
* HiveConnector
*/
public class HiveConnector implements IConnector {
private final SparkSession sparkSession;
private final ConnectorParameter connectorParameter;
public HiveConnector(SparkSession sparkSession, ConnectorParameter connectorParameter) {
this.sparkSession = sparkSession;
this.connectorParameter = connectorParameter;
}
@Override
public void execute() {
Map<String,Object> config = connectorParameter.getConfig();
String database = String.valueOf(config.getOrDefault(DATABASE,DEFAULT_DATABASE));
String table = String.valueOf(config.getOrDefault(TABLE,EMPTY));
String dbTable = database + Constants.DOTS + table;
sparkSession.table(dbTable).createOrReplaceTempView(table);
}
}

61
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/connector/JdbcConnector.java

@ -1,61 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.connector;
import org.apache.dolphinscheduler.data.quality.configuration.ConnectorParameter;
import org.apache.dolphinscheduler.data.quality.flow.JdbcBaseConfig;
import org.apache.dolphinscheduler.data.quality.utils.JdbcUtils;
import org.apache.dolphinscheduler.data.quality.utils.Preconditions;
import org.apache.spark.sql.SparkSession;
import java.util.Map;
/**
* JdbcConnector
*/
public class JdbcConnector implements IConnector {
private final SparkSession sparkSession;
private final ConnectorParameter connectorParameter;
public JdbcConnector(SparkSession sparkSession, ConnectorParameter connectorParameter) {
this.sparkSession = sparkSession;
this.connectorParameter = connectorParameter;
}
@Override
public void execute() {
Map<String,Object> config = connectorParameter.getConfig();
JdbcBaseConfig jdbcBaseConfig = new JdbcBaseConfig(config);
Preconditions.checkArgument(JdbcUtils.isJdbcDriverLoaded(jdbcBaseConfig.getDriver()), "JDBC driver $driver not present in classpath");
sparkSession
.read()
.format("jdbc")
.option("driver",jdbcBaseConfig.getDriver())
.option("url",jdbcBaseConfig.getUrl())
.option("dbtable", jdbcBaseConfig.getDbTable())
.option("user", jdbcBaseConfig.getUser())
.option("password", jdbcBaseConfig.getPassword())
.load().createOrReplaceTempView(jdbcBaseConfig.getTable());
}
}

59
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/executor/SparkSqlExecuteTask.java

@ -1,59 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.executor;
import org.apache.dolphinscheduler.data.quality.configuration.ExecutorParameter;
import org.apache.dolphinscheduler.data.quality.flow.DataQualityTask;
import org.apache.dolphinscheduler.data.quality.utils.StringUtils;
import org.apache.spark.sql.SparkSession;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* SparkSqlExecuteTask
*/
public class SparkSqlExecuteTask implements DataQualityTask {
private static final Logger logger = LoggerFactory.getLogger(SparkSqlExecuteTask.class);
private final SparkSession sparkSession;
private final List<ExecutorParameter> executorParameterList;
public SparkSqlExecuteTask(SparkSession sparkSession,List<ExecutorParameter> executorParameterList) {
this.sparkSession = sparkSession;
this.executorParameterList = executorParameterList;
}
@Override
public void execute() {
for (ExecutorParameter executorParameter : executorParameterList) {
if (StringUtils.isNotEmpty(executorParameter.getTableAlias())) {
sparkSession
.sql(executorParameter.getExecuteSql())
.createOrReplaceTempView(executorParameter.getTableAlias());
} else {
logger.error("lost table alias");
}
}
}
}

68
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/writer/JdbcWriter.java

@ -1,68 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.writer;
import static org.apache.dolphinscheduler.data.quality.Constants.EMPTY;
import static org.apache.dolphinscheduler.data.quality.Constants.SQL;
import org.apache.dolphinscheduler.data.quality.configuration.WriterParameter;
import org.apache.dolphinscheduler.data.quality.flow.JdbcBaseConfig;
import org.apache.dolphinscheduler.data.quality.utils.JdbcUtils;
import org.apache.dolphinscheduler.data.quality.utils.Preconditions;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import java.util.Map;
/**
* JdbcWriter
*/
public class JdbcWriter implements IWriter {
private final SparkSession sparkSession;
private final WriterParameter writerParam;
public JdbcWriter(SparkSession sparkSession, WriterParameter writerParam) {
this.sparkSession = sparkSession;
this.writerParam = writerParam;
}
@Override
public void execute() {
Map<String,Object> config = writerParam.getConfig();
JdbcBaseConfig jdbcBaseConfig = new JdbcBaseConfig(config);
String sql = String.valueOf(config.getOrDefault(SQL,EMPTY));
Preconditions.checkArgument(JdbcUtils.isJdbcDriverLoaded(jdbcBaseConfig.getDriver()), "JDBC driver $driver not present in classpath");
sparkSession.sql(sql)
.write()
.format("jdbc")
.option("driver",jdbcBaseConfig.getDriver())
.option("url",jdbcBaseConfig.getUrl())
.option("dbtable", jdbcBaseConfig.getTable())
.option("user", jdbcBaseConfig.getUser())
.option("password", jdbcBaseConfig.getPassword())
.mode(SaveMode.Append)
.save();
}
}

69
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/flow/writer/WriterFactory.java

@ -1,69 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.writer;
import org.apache.dolphinscheduler.data.quality.configuration.WriterParameter;
import org.apache.dolphinscheduler.data.quality.context.DataQualityContext;
import org.apache.dolphinscheduler.data.quality.enums.WriterType;
import org.apache.dolphinscheduler.data.quality.exception.DataQualityException;
import org.apache.spark.sql.SparkSession;
import java.util.ArrayList;
import java.util.List;
/**
* WriterFactory
*/
public class WriterFactory {
private static class Singleton {
static WriterFactory instance = new WriterFactory();
}
public static WriterFactory getInstance() {
return Singleton.instance;
}
public List<IWriter> getWriters(DataQualityContext context) throws DataQualityException {
List<IWriter> writerList = new ArrayList<>();
for (WriterParameter writerParam:context.getWriterParamList()) {
IWriter writer = getWriter(context.getSparkSession(),writerParam);
if (writer != null) {
writerList.add(writer);
}
}
return writerList;
}
private IWriter getWriter(SparkSession sparkSession,WriterParameter writerParam) throws DataQualityException {
WriterType writerType = WriterType.getType(writerParam.getType());
if (writerType != null) {
if (writerType == WriterType.JDBC) {
return new JdbcWriter(sparkSession, writerParam);
}
throw new DataQualityException("writer type $readerType is not supported!");
}
return null;
}
}

56
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/utils/ConfigUtils.java

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.utils;
import org.apache.dolphinscheduler.data.quality.config.Config;
import java.util.LinkedHashMap;
import java.util.Map;
public class ConfigUtils {
private ConfigUtils() {
throw new IllegalStateException("Construct ConfigUtils");
}
/**
* Extract sub config with fixed prefix
*
* @param source config source
* @param prefix config prefix
* @param keepPrefix true if keep prefix
*/
public static Config extractSubConfig(Config source, String prefix, boolean keepPrefix) {
Map<String, Object> values = new LinkedHashMap<>();
for (Map.Entry<String, Object> entry : source.entrySet()) {
final String key = entry.getKey();
final String value = String.valueOf(entry.getValue());
if (key.startsWith(prefix)) {
if (keepPrefix) {
values.put(key, value);
} else {
values.put(key.substring(prefix.length()), value);
}
}
}
return new Config(values);
}
}

2
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/utils/JsonUtils.java

@ -21,6 +21,7 @@ import static com.fasterxml.jackson.databind.DeserializationFeature.ACCEPT_EMPTY
import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES;
import static com.fasterxml.jackson.databind.DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL;
import static com.fasterxml.jackson.databind.MapperFeature.REQUIRE_SETTERS_FOR_GETTERS;
import static com.fasterxml.jackson.databind.SerializationFeature.FAIL_ON_EMPTY_BEANS;
import java.util.TimeZone;
@ -44,6 +45,7 @@ public class JsonUtils {
.configure(ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT, true)
.configure(READ_UNKNOWN_ENUM_VALUES_AS_NULL, true)
.configure(REQUIRE_SETTERS_FOR_GETTERS, true)
.configure(FAIL_ON_EMPTY_BEANS,false)
.setTimeZone(TimeZone.getDefault());
private JsonUtils() {

24
dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/SparkApplicationTestBase.java

@ -17,8 +17,11 @@
package org.apache.dolphinscheduler.data.quality;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.execution.SparkRuntimeEnvironment;
import java.util.HashMap;
import java.util.Map;
import org.junit.Before;
@ -27,16 +30,17 @@ import org.junit.Before;
*/
public class SparkApplicationTestBase {
protected SparkSession sparkSession;
protected SparkRuntimeEnvironment sparkRuntimeEnvironment;
@Before
public void before() {
SparkConf conf = new SparkConf().setAppName("data quality test");
conf.set("spark.sql.crossJoin.enabled", "true");
conf.set("spark.driver.bindAddress","127.0.0.1");
sparkSession = SparkSession.builder()
.master("local[4]")
.config(conf)
.getOrCreate();
Map<String,Object> config = new HashMap<>();
config.put("spark.app.name","data quality test");
config.put("spark.sql.crossJoin.enabled","true");
config.put("spark.driver.bindAddress","127.0.0.1");
config.put("spark.ui.port",13000);
config.put("spark.master","local[4]");
sparkRuntimeEnvironment = new SparkRuntimeEnvironment(new Config(config));
}
}

58
dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/configuration/ConfigurationParserTest.java

@ -17,6 +17,7 @@
package org.apache.dolphinscheduler.data.quality.configuration;
import org.apache.dolphinscheduler.data.quality.config.DataQualityConfiguration;
import org.apache.dolphinscheduler.data.quality.utils.JsonUtils;
import org.junit.Assert;
@ -35,49 +36,20 @@ public class ConfigurationParserTest {
private int verifyConfigurationValidate() {
int flag = 1;
try {
String parameterStr = "{\n"
+ "\t\"name\": \"\\u81EA\\u5B9A\\u4E49SQL\",\n"
+ "\t\"connectors\": [{\n"
+ "\t\t\"type\": \"JDBC\",\n"
+ "\t\t\"config\": {\n"
+ "\t\t\t\"database\": \"test\",\n"
+ "\t\t\t\"password\": \"123456\",\n"
+ "\t\t\t\"driver\": \"com.mysql.jdbc.Driver\",\n"
+ "\t\t\t\"user\": \"test\",\n"
+ "\t\t\t\"table\": \"test1\",\n"
+ "\t\t\t\"url\": \"jdbc:mysql://localhost:3306/test\"\n"
+ "\t\t}\n"
+ "\t}],\n"
+ "\t\"writers\": [{\n"
+ "\t\t\"type\": \"JDBC\",\n"
+ "\t\t\"config\": {\n"
+ "\t\t\t\"database\": \"dolphinscheduler\",\n"
+ "\t\t\t\"password\": \"Test@123!\",\n"
+ "\t\t\t\"driver\": \"com.mysql.jdbc.Driver\",\n"
+ "\t\t\t\"user\": \"test\",\n"
+ "\t\t\t\"table\": \"t_ds_dqs_result\",\n"
+ "\t\t\t\"url\": \"jdbc:mysql://localhost:3306/dolphinscheduler?characterEncoding=UTF-8&allowMultiQueries=true\",\n"
+ "\t\t\t\"sql\": \"SELECT 1 as rule_type,"
+ "'\\u81EA\\u5B9A\\u4E49SQL' as rule_name,"
+ "18 as process_definition_id,"
+ "64 as process_instance_id,"
+ "70 as task_instance_id,"
+ "mySum AS statistics_value, "
+ "total_count.total AS comparison_value,"
+ "0 as check_type,"
+ "6 as threshold, "
+ "0 as operator, "
+ "0 as failure_strategy, "
+ "'2021-01-31 15:00:07' as create_time,"
+ "'2021-01-31 15:00:07' as update_time from ( select sum(c4) as mySum from test1 ) tmp1 join total_count\"\n"
+ "\t\t}\n"
+ "\t}],\n"
+ "\t\"executors\": [{\n"
+ "\t\t\"index\": \"1\",\n"
+ "\t\t\"execute.sql\": \"SELECT COUNT(*) AS total FROM test1 WHERE (c3 != '55')\",\n"
+ "\t\t\"table.alias\": \"total_count\"\n"
+ "\t}]\n"
+ "}";
String parameterStr = "{\"name\":\"data quality test\",\"env\":{\"type\":\"batch\",\"config\":null},"
+ "\"readers\":[{\"type\":\"JDBC\",\"config\":{\"database\":\"test\",\"password\":\"Test@123!\","
+ "\"driver\":\"com.mysql.jdbc.Driver\",\"user\":\"test\",\"output_table\":\"test1\",\"table\":\"test1\","
+ "\"url\":\"jdbc:mysql://172.16.100.199:3306/test\"} }],\"transformers\":[{\"type\":\"sql\",\"config\":"
+ "{\"index\":1,\"output_table\":\"miss_count\",\"sql\":\"SELECT COUNT(*) AS miss FROM test1 WHERE (c1 is null or c1 = '') \"} },"
+ "{\"type\":\"sql\",\"config\":{\"index\":2,\"output_table\":\"total_count\",\"sql\":\"SELECT COUNT(*) AS total FROM test1 \"} }],"
+ "\"writers\":[{\"type\":\"JDBC\",\"config\":{\"database\":\"dolphinscheduler\",\"password\":\"test\","
+ "\"driver\":\"org.postgresql.Driver\",\"user\":\"test\",\"table\":\"t_ds_dq_execute_result\","
+ "\"url\":\"jdbc:postgresql://172.16.100.199:5432/dolphinscheduler?stringtype=unspecified\","
+ "\"sql\":\"SELECT 0 as rule_type,'data quality test' as rule_name,7 as process_definition_id,80 as process_instance_id,"
+ "80 as task_instance_id,miss_count.miss AS statistics_value, total_count.total AS comparison_value,2 as check_type,10 as"
+ " threshold, 3 as operator, 0 as failure_strategy, '2021-06-29 10:18:59' as create_time,'2021-06-29 10:18:59' as update_time "
+ "from miss_count FULL JOIN total_count\"} }]}";
DataQualityConfiguration dataQualityConfiguration = JsonUtils.fromJson(parameterStr,DataQualityConfiguration.class);
dataQualityConfiguration.validate();
} catch (Exception e) {

21
dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/connector/JdbcConnectorTest.java → dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/reader/JdbcReaderTest.java

@ -15,7 +15,7 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.connector;
package org.apache.dolphinscheduler.data.quality.flow.reader;
import static org.apache.dolphinscheduler.data.quality.Constants.DATABASE;
import static org.apache.dolphinscheduler.data.quality.Constants.DRIVER;
@ -24,20 +24,22 @@ import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.URL;
import static org.apache.dolphinscheduler.data.quality.Constants.USER;
import org.apache.dolphinscheduler.data.quality.configuration.ConnectorParameter;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.flow.FlowTestBase;
import org.apache.dolphinscheduler.data.quality.flow.batch.reader.JdbcReader;
import java.sql.Connection;
import java.util.HashMap;
import java.util.Map;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
/**
* JdbcConnectorTest
*/
public class JdbcConnectorTest extends FlowTestBase {
public class JdbcReaderTest extends FlowTestBase {
@Before
public void before() {
@ -47,22 +49,19 @@ public class JdbcConnectorTest extends FlowTestBase {
@Test
public void testJdbcConnectorExecute() {
JdbcConnector jdbcConnector = new JdbcConnector(sparkSession,buildConnectorParameter());
jdbcConnector.execute();
JdbcReader jdbcReader = new JdbcReader(buildReaderConfig());
Assert.assertNotNull(jdbcReader.read(sparkRuntimeEnvironment));
}
private ConnectorParameter buildConnectorParameter() {
ConnectorParameter connectorParameter = new ConnectorParameter();
connectorParameter.setType("JDBC");
private Config buildReaderConfig() {
Map<String,Object> config = new HashMap<>();
config.put(DATABASE,"test");
config.put(TABLE,"test1");
config.put(TABLE,"test.test1");
config.put(URL,url);
config.put(USER,"test");
config.put(PASSWORD,"123456");
config.put(DRIVER,driver);
connectorParameter.setConfig(config);
return connectorParameter;
return new Config(config);
}
private void createConnectorTable() {

26
dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/connector/ConnectorFactoryTest.java → dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/reader/ReaderFactoryTest.java

@ -15,7 +15,7 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.flow.connector;
package org.apache.dolphinscheduler.data.quality.flow.reader;
import static org.apache.dolphinscheduler.data.quality.Constants.DATABASE;
import static org.apache.dolphinscheduler.data.quality.Constants.DRIVER;
@ -24,8 +24,9 @@ import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.URL;
import static org.apache.dolphinscheduler.data.quality.Constants.USER;
import org.apache.dolphinscheduler.data.quality.configuration.ConnectorParameter;
import org.apache.dolphinscheduler.data.quality.context.DataQualityContext;
import org.apache.dolphinscheduler.data.quality.config.ReaderConfig;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchReader;
import org.apache.dolphinscheduler.data.quality.flow.batch.reader.ReaderFactory;
import java.util.ArrayList;
import java.util.HashMap;
@ -38,15 +39,14 @@ import org.junit.Test;
/**
* ConnectorFactoryTest
*/
public class ConnectorFactoryTest {
public class ReaderFactoryTest {
@Test
public void testConnectorGenerate() {
DataQualityContext context = new DataQualityContext();
List<ConnectorParameter> connectorParameters = new ArrayList<>();
ConnectorParameter connectorParameter = new ConnectorParameter();
connectorParameter.setType("JDBC");
List<ReaderConfig> readerConfigs = new ArrayList<>();
ReaderConfig readerConfig = new ReaderConfig();
readerConfig.setType("JDBC");
Map<String,Object> config = new HashMap<>();
config.put(DATABASE,"test");
config.put(TABLE,"test1");
@ -54,15 +54,13 @@ public class ConnectorFactoryTest {
config.put(USER,"test");
config.put(PASSWORD,"123456");
config.put(DRIVER,"com.mysql.jdbc.Driver");
connectorParameter.setConfig(config);
connectorParameter.setConfig(null);
connectorParameters.add(connectorParameter);
context.setConnectorParameterList(connectorParameters);
readerConfig.setConfig(config);
readerConfigs.add(readerConfig);
int flag = 0;
try {
List<IConnector> connectors = ConnectorFactory.getInstance().getConnectors(context);
if (connectors != null && connectors.size() >= 1) {
List<BatchReader> readers = ReaderFactory.getInstance().getReaders(null,readerConfigs);
if (readers != null && readers.size() >= 1) {
flag = 1;
}
} catch (Exception e) {

24
dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/writer/JdbcWriterTest.java

@ -24,8 +24,10 @@ import static org.apache.dolphinscheduler.data.quality.Constants.TABLE;
import static org.apache.dolphinscheduler.data.quality.Constants.URL;
import static org.apache.dolphinscheduler.data.quality.Constants.USER;
import org.apache.dolphinscheduler.data.quality.configuration.WriterParameter;
import org.apache.dolphinscheduler.data.quality.config.Config;
import org.apache.dolphinscheduler.data.quality.flow.FlowTestBase;
import org.apache.dolphinscheduler.data.quality.flow.batch.reader.JdbcReader;
import org.apache.dolphinscheduler.data.quality.flow.batch.writer.JdbcWriter;
import java.sql.Connection;
import java.util.HashMap;
@ -47,23 +49,27 @@ public class JdbcWriterTest extends FlowTestBase {
@Test
public void testJdbcWriterExecute() {
JdbcWriter jdbcWriter = new JdbcWriter(sparkSession,buildWriterParameter());
jdbcWriter.execute();
JdbcReader jdbcConnector = new JdbcReader(buildJdbcReaderConfig());
JdbcWriter jdbcWriter = new JdbcWriter(buildJdbcConfig());
jdbcWriter.write(jdbcConnector.read(sparkRuntimeEnvironment),sparkRuntimeEnvironment);
}
private WriterParameter buildWriterParameter() {
WriterParameter writerParameter = new WriterParameter();
writerParameter.setType("JDBC");
private Config buildJdbcConfig() {
Map<String,Object> config = new HashMap<>();
config.put(DATABASE,"test");
config.put(TABLE,"test2");
config.put(TABLE,"test.test2");
config.put(URL,url);
config.put(USER,"test");
config.put(PASSWORD,"123456");
config.put(DRIVER,driver);
config.put("save_mode","append");
return new Config(config);
}
private Config buildJdbcReaderConfig() {
Config config = buildJdbcConfig();
config.put("sql","SELECT '1' as company,'1' as date,'2' as c1,'2' as c2,'2' as c3, 2 as c4");
writerParameter.setConfig(config);
return writerParameter;
return config;
}
private void createWriterTable() {

19
dolphinscheduler-data-quality/src/test/java/org/apache/dolphinscheduler/data/quality/flow/writer/WriterFactoryTest.java

@ -17,8 +17,9 @@
package org.apache.dolphinscheduler.data.quality.flow.writer;
import org.apache.dolphinscheduler.data.quality.configuration.WriterParameter;
import org.apache.dolphinscheduler.data.quality.context.DataQualityContext;
import org.apache.dolphinscheduler.data.quality.config.WriterConfig;
import org.apache.dolphinscheduler.data.quality.flow.batch.BatchWriter;
import org.apache.dolphinscheduler.data.quality.flow.batch.writer.WriterFactory;
import java.util.ArrayList;
import java.util.List;
@ -34,17 +35,15 @@ public class WriterFactoryTest {
@Test
public void testWriterGenerate() {
DataQualityContext context = new DataQualityContext();
List<WriterParameter> writerParameters = new ArrayList<>();
WriterParameter writerParameter = new WriterParameter();
writerParameter.setType("JDBC");
writerParameter.setConfig(null);
writerParameters.add(writerParameter);
context.setWriterParamList(writerParameters);
List<WriterConfig> writerConfigs = new ArrayList<>();
WriterConfig writerConfig = new WriterConfig();
writerConfig.setType("JDBC");
writerConfig.setConfig(null);
writerConfigs.add(writerConfig);
int flag = 0;
try {
List<IWriter> writers = WriterFactory.getInstance().getWriters(context);
List<BatchWriter> writers = WriterFactory.getInstance().getWriters(null,writerConfigs);
if (writers != null && writers.size() >= 1) {
flag = 1;
}

128
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/entity/DataQualityTaskExecutionContext.java

@ -31,11 +31,15 @@ import java.util.List;
public class DataQualityTaskExecutionContext implements Serializable {
/**
* ruleName
* rule id
*/
private int ruleId;
/**
* rule name
*/
private String ruleName;
/**
* ruleType
* rule type
*/
private RuleType ruleType;
/**
@ -46,56 +50,82 @@ public class DataQualityTaskExecutionContext implements Serializable {
* execute sql list
*/
private List<DqRuleExecuteSql> executeSqlList;
/**
* sourceConnectorType
* if comparison value calculate from statistics value table
*/
private boolean comparisonNeedStatisticsValueTable = false;
/**
* compare with fixed value
*/
private boolean compareWithFixedValue = false;
/**
* error output path
*/
private String hdfsPath;
/**
* sourceConnector type
*/
private String sourceConnectorType;
/**
* sourceType
* source type
*/
private int sourceType;
/**
* sourceConnectionParams
* source connection params
*/
private String sourceConnectionParams;
/**
* targetConnectorType
* target connector type
*/
private String targetConnectorType;
/**
* targetType
* target type
*/
private int targetType;
/**
* targetConnectionParams
* target connection params
*/
private String targetConnectionParams;
/**
* sourceConnectorType
* source connector type
*/
private String writerConnectorType;
/**
* writerType
* writer type
*/
private int writerType;
/**
* writer table
*/
private String writerTable;
/**
* writerConnectionParams
* writer connection params
*/
private String writerConnectionParams;
/**
* statistics value connector type
*/
private String statisticsValueConnectorType;
/**
* statistics value type
*/
private int statisticsValueType;
/**
* statistics value table
*/
private String statisticsValueTable;
/**
* statistics value writer connection params
*/
private String statisticsValueWriterConnectionParams;
public int getRuleId() {
return ruleId;
}
public void setRuleId(int ruleId) {
this.ruleId = ruleId;
}
public String getSourceConnectorType() {
return sourceConnectorType;
@ -177,6 +207,38 @@ public class DataQualityTaskExecutionContext implements Serializable {
this.writerConnectorType = writerConnectorType;
}
public String getStatisticsValueConnectorType() {
return statisticsValueConnectorType;
}
public void setStatisticsValueConnectorType(String statisticsValueConnectorType) {
this.statisticsValueConnectorType = statisticsValueConnectorType;
}
public int getStatisticsValueType() {
return statisticsValueType;
}
public void setStatisticsValueType(int statisticsValueType) {
this.statisticsValueType = statisticsValueType;
}
public String getStatisticsValueTable() {
return statisticsValueTable;
}
public void setStatisticsValueTable(String statisticsValueTable) {
this.statisticsValueTable = statisticsValueTable;
}
public String getStatisticsValueWriterConnectionParams() {
return statisticsValueWriterConnectionParams;
}
public void setStatisticsValueWriterConnectionParams(String statisticsValueWriterConnectionParams) {
this.statisticsValueWriterConnectionParams = statisticsValueWriterConnectionParams;
}
public String getRuleName() {
return ruleName;
}
@ -222,4 +284,28 @@ public class DataQualityTaskExecutionContext implements Serializable {
}
this.executeSqlList.add(executeSqlDefinition);
}
public boolean isComparisonNeedStatisticsValueTable() {
return comparisonNeedStatisticsValueTable;
}
public void setComparisonNeedStatisticsValueTable(boolean comparisonNeedStatisticsValueTable) {
this.comparisonNeedStatisticsValueTable = comparisonNeedStatisticsValueTable;
}
public boolean isCompareWithFixedValue() {
return compareWithFixedValue;
}
public void setCompareWithFixedValue(boolean compareWithFixedValue) {
this.compareWithFixedValue = compareWithFixedValue;
}
public String getHdfsPath() {
return hdfsPath;
}
public void setHdfsPath(String hdfsPath) {
this.hdfsPath = hdfsPath;
}
}

20
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java

@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master;
import org.apache.dolphinscheduler.common.Constants;
@ -31,6 +32,9 @@ import org.apache.dolphinscheduler.server.worker.WorkerServer;
import org.apache.dolphinscheduler.server.zk.ZKMasterClient;
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
import org.apache.dolphinscheduler.service.quartz.QuartzExecutors;
import javax.annotation.PostConstruct;
import org.quartz.SchedulerException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -40,11 +44,6 @@ import org.springframework.boot.builder.SpringApplicationBuilder;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.FilterType;
import javax.annotation.PostConstruct;
@ComponentScan(value = "org.apache.dolphinscheduler", excludeFilters = {
@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = {WorkerServer.class})
})
@ -106,7 +105,7 @@ public class MasterServer {
* run master server
*/
@PostConstruct
public void run(){
public void run() {
//init remoting server
NettyServerConfig serverConfig = new NettyServerConfig();
@ -160,7 +159,7 @@ public class MasterServer {
try {
//execute only once
if(Stopper.isStopped()){
if (Stopper.isStopped()) {
return;
}
@ -172,7 +171,7 @@ public class MasterServer {
try {
//thread sleep 3 seconds for thread quietly stop
Thread.sleep(3000L);
}catch (Exception e){
} catch (Exception e) {
logger.warn("thread sleep exception ", e);
}
//
@ -180,11 +179,12 @@ public class MasterServer {
this.nettyRemotingServer.close();
this.masterRegistry.unRegistry();
this.zkMasterClient.close();
//close quartz
try{
try {
QuartzExecutors.getInstance().shutdown();
logger.info("Quartz service stopped");
}catch (Exception e){
} catch (Exception e) {
logger.warn("Quartz service stopped exception:{}",e.getMessage());
}
} catch (Exception e) {

127
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/consumer/TaskPriorityQueueConsumer.java

@ -17,6 +17,15 @@
package org.apache.dolphinscheduler.server.master.consumer;
import static org.apache.dolphinscheduler.common.Constants.ADDRESS;
import static org.apache.dolphinscheduler.common.Constants.COMPARISON_NAME;
import static org.apache.dolphinscheduler.common.Constants.COMPARISON_TABLE;
import static org.apache.dolphinscheduler.common.Constants.COMPARISON_TYPE;
import static org.apache.dolphinscheduler.common.Constants.DATABASE;
import static org.apache.dolphinscheduler.common.Constants.OTHER;
import static org.apache.dolphinscheduler.common.Constants.PASSWORD;
import static org.apache.dolphinscheduler.common.Constants.USER;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.DbType;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
@ -25,6 +34,8 @@ import org.apache.dolphinscheduler.common.enums.SqoopJobType;
import org.apache.dolphinscheduler.common.enums.TaskType;
import org.apache.dolphinscheduler.common.enums.UdfType;
import org.apache.dolphinscheduler.common.enums.dq.ConnectorType;
import org.apache.dolphinscheduler.common.enums.dq.ExecuteSqlType;
import org.apache.dolphinscheduler.common.model.JdbcInfo;
import org.apache.dolphinscheduler.common.model.TaskNode;
import org.apache.dolphinscheduler.common.process.ResourceInfo;
import org.apache.dolphinscheduler.common.task.AbstractParameters;
@ -40,10 +51,14 @@ import org.apache.dolphinscheduler.common.utils.CollectionUtils;
import org.apache.dolphinscheduler.common.utils.EnumUtils;
import org.apache.dolphinscheduler.common.utils.FileUtils;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.common.utils.StringUtils;
import org.apache.dolphinscheduler.common.utils.TaskParametersUtils;
import org.apache.dolphinscheduler.dao.datasource.SpringConnectionFactory;
import org.apache.dolphinscheduler.dao.entity.DataSource;
import org.apache.dolphinscheduler.dao.entity.DqComparisonType;
import org.apache.dolphinscheduler.dao.entity.DqRule;
import org.apache.dolphinscheduler.dao.entity.DqRuleExecuteSql;
import org.apache.dolphinscheduler.dao.entity.DqRuleInputEntry;
import org.apache.dolphinscheduler.dao.entity.Resource;
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
@ -61,6 +76,7 @@ import org.apache.dolphinscheduler.server.master.dispatch.ExecutorDispatcher;
import org.apache.dolphinscheduler.server.master.dispatch.context.ExecutionContext;
import org.apache.dolphinscheduler.server.master.dispatch.enums.ExecutorType;
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.ExecuteException;
import org.apache.dolphinscheduler.server.utils.JdbcUrlParser;
import org.apache.dolphinscheduler.service.process.ProcessService;
import org.apache.dolphinscheduler.service.queue.TaskPriority;
import org.apache.dolphinscheduler.service.queue.TaskPriorityQueue;
@ -69,6 +85,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@ -81,6 +98,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import com.alibaba.druid.pool.DruidDataSource;
/**
* TaskUpdateQueue consumer
*/
@ -110,6 +129,8 @@ public class TaskPriorityQueueConsumer extends Thread {
@Autowired
private ExecutorDispatcher dispatcher;
@Autowired
private SpringConnectionFactory springConnectionFactory;
/**
* master config
@ -220,8 +241,7 @@ public class TaskPriorityQueueConsumer extends Thread {
taskInstance.getStartTime(),
taskInstance.getHost(),
null,
null,
taskInstance.getId());
null);
return null;
}
// set queue for process instance, user-specified queue takes precedence over tenant queue
@ -257,7 +277,7 @@ public class TaskPriorityQueueConsumer extends Thread {
}
if (taskType == TaskType.DATA_QUALITY) {
setDataQualityTaskRelation(dataQualityTaskExecutionContext, taskNode);
setDataQualityTaskRelation(dataQualityTaskExecutionContext, taskNode,tenant.getTenantCode());
}
return TaskExecutionContextBuilder.get()
@ -347,9 +367,8 @@ public class TaskPriorityQueueConsumer extends Thread {
* @param dataQualityTaskExecutionContext dataQualityTaskExecutionContext
* @param taskNode taskNode
*/
private void setDataQualityTaskRelation(DataQualityTaskExecutionContext dataQualityTaskExecutionContext, TaskNode taskNode) {
private void setDataQualityTaskRelation(DataQualityTaskExecutionContext dataQualityTaskExecutionContext, TaskNode taskNode,String tenantCode) {
DataQualityParameters dataQualityParameters = JSONUtils.parseObject(taskNode.getParams(), DataQualityParameters.class);
if (dataQualityParameters == null) {
return;
}
@ -358,12 +377,12 @@ public class TaskPriorityQueueConsumer extends Thread {
int ruleId = dataQualityParameters.getRuleId();
DqRule dqRule = processService.getDqRule(ruleId);
if (dqRule == null) {
logger.error("can not get DqRule by id {}",ruleId);
return;
}
dataQualityTaskExecutionContext.setRuleId(ruleId);
dataQualityTaskExecutionContext.setRuleType(dqRule.getType());
dataQualityTaskExecutionContext.setRuleName(dqRule.getName());
@ -372,29 +391,99 @@ public class TaskPriorityQueueConsumer extends Thread {
logger.error("{} rule input entry list is empty ",ruleId);
return;
}
List<DqRuleExecuteSql> executeSqlList = processService.getDqExecuteSql(ruleId);
setComparisonParams(dataQualityTaskExecutionContext, config, ruleInputEntryList, executeSqlList);
dataQualityTaskExecutionContext.setRuleInputEntryList(ruleInputEntryList);
dataQualityTaskExecutionContext.setExecuteSqlList(processService.getDqExecuteSql(ruleId));
dataQualityTaskExecutionContext.setExecuteSqlList(executeSqlList);
dataQualityTaskExecutionContext.setHdfsPath(
PropertyUtils.getString(Constants.FS_DEFAULTFS)
+ PropertyUtils.getString(Constants.DATA_QUALITY_ERROR_OUTPUT_PATH, "/user/" + tenantCode + "/data_quality_error_data"));
setSourceConfig(dataQualityTaskExecutionContext, config);
setTargetConfig(dataQualityTaskExecutionContext, config);
setWriterConfig(dataQualityTaskExecutionContext, config);
setWriterConfig(dataQualityTaskExecutionContext);
setStatisticsValueWriterConfig(dataQualityTaskExecutionContext);
}
private void setWriterConfig(DataQualityTaskExecutionContext dataQualityTaskExecutionContext, Map<String, String> config) {
if (StringUtils.isNotEmpty(config.get(Constants.WRITER_DATASOURCE_ID))) {
DataSource dataSource = processService.findDataSourceById(Integer.parseInt(config.get(Constants.WRITER_DATASOURCE_ID)));
if (dataSource != null) {
ConnectorType writerConnectorType = ConnectorType.of(
DbType.of(Integer.parseInt(config.get(Constants.WRITER_CONNECTOR_TYPE))).isHive() ? 1 : 0);
dataQualityTaskExecutionContext.setWriterConnectorType(writerConnectorType.getDescription());
dataQualityTaskExecutionContext.setWriterType(dataSource.getType().getCode());
dataQualityTaskExecutionContext.setWriterConnectionParams(dataSource.getConnectionParams());
dataQualityTaskExecutionContext.setWriterTable("t_ds_dq_execute_result");
private void setComparisonParams(DataQualityTaskExecutionContext dataQualityTaskExecutionContext,
Map<String, String> config,
List<DqRuleInputEntry> ruleInputEntryList,
List<DqRuleExecuteSql> executeSqlList) {
if (config.get(COMPARISON_TYPE) != null) {
int comparisonTypeId = Integer.parseInt(config.get(COMPARISON_TYPE));
// comparison type id 1 is fixed value ,do not need set param
if (comparisonTypeId > 1) {
DqComparisonType type = processService.getComparisonTypeById(comparisonTypeId);
if (type != null) {
DqRuleInputEntry comparisonName = new DqRuleInputEntry();
comparisonName.setField(COMPARISON_NAME);
comparisonName.setValue(type.getName());
ruleInputEntryList.add(comparisonName);
DqRuleInputEntry comparisonTable = new DqRuleInputEntry();
comparisonTable.setField(COMPARISON_TABLE);
comparisonTable.setValue(type.getOutputTable());
ruleInputEntryList.add(comparisonTable);
if (executeSqlList == null) {
executeSqlList = new ArrayList<>();
}
DqRuleExecuteSql dqRuleExecuteSql = new DqRuleExecuteSql();
dqRuleExecuteSql.setType(ExecuteSqlType.MIDDLE);
dqRuleExecuteSql.setIndex(1);
dqRuleExecuteSql.setSql(type.getExecuteSql());
dqRuleExecuteSql.setTableAlias(type.getOutputTable());
executeSqlList.add(0,dqRuleExecuteSql);
if (Boolean.TRUE.equals(type.getInnerSource())) {
dataQualityTaskExecutionContext.setComparisonNeedStatisticsValueTable(true);
}
}
} else if (comparisonTypeId == 1) {
dataQualityTaskExecutionContext.setCompareWithFixedValue(true);
}
}
}
public DataSource getDefaultDataSource() {
DruidDataSource druidDataSource = springConnectionFactory.dataSource();
DataSource dataSource = new DataSource();
dataSource.setUserName(druidDataSource.getUsername());
JdbcInfo jdbcInfo = JdbcUrlParser.getJdbcInfo(druidDataSource.getUrl());
if (jdbcInfo != null) {
Properties properties = new Properties();
properties.setProperty(USER,druidDataSource.getUsername());
properties.setProperty(PASSWORD,druidDataSource.getPassword());
properties.setProperty(DATABASE, jdbcInfo.getDatabase());
properties.setProperty(ADDRESS,jdbcInfo.getAddress());
properties.setProperty(OTHER,jdbcInfo.getParams());
dataSource.setType(JdbcUrlParser.getDbType(jdbcInfo.getDriverName()));
dataSource.setConnectionParams(JSONUtils.toJsonString(properties));
}
return dataSource;
}
private void setStatisticsValueWriterConfig(DataQualityTaskExecutionContext dataQualityTaskExecutionContext) {
DataSource dataSource = getDefaultDataSource();
ConnectorType writerConnectorType = ConnectorType.of(dataSource.getType().isHive() ? 1 : 0);
dataQualityTaskExecutionContext.setStatisticsValueConnectorType(writerConnectorType.getDescription());
dataQualityTaskExecutionContext.setStatisticsValueType(dataSource.getType().getCode());
dataQualityTaskExecutionContext.setStatisticsValueWriterConnectionParams(dataSource.getConnectionParams());
dataQualityTaskExecutionContext.setStatisticsValueTable("t_ds_dq_task_statistics_value");
}
private void setWriterConfig(DataQualityTaskExecutionContext dataQualityTaskExecutionContext) {
DataSource dataSource = getDefaultDataSource();
ConnectorType writerConnectorType = ConnectorType.of(dataSource.getType().isHive() ? 1 : 0);
dataQualityTaskExecutionContext.setWriterConnectorType(writerConnectorType.getDescription());
dataQualityTaskExecutionContext.setWriterType(dataSource.getType().getCode());
dataQualityTaskExecutionContext.setWriterConnectionParams(dataSource.getConnectionParams());
dataQualityTaskExecutionContext.setWriterTable("t_ds_dq_execute_result");
}
private void setTargetConfig(DataQualityTaskExecutionContext dataQualityTaskExecutionContext, Map<String, String> config) {
if (StringUtils.isNotEmpty(config.get(Constants.TARGET_DATASOURCE_ID))) {
DataSource dataSource = processService.findDataSourceById(Integer.parseInt(config.get(Constants.TARGET_DATASOURCE_ID)));

122
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/TaskResponseService.java

@ -19,20 +19,13 @@ package org.apache.dolphinscheduler.server.master.processor.queue;
import org.apache.dolphinscheduler.common.enums.Event;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import org.apache.dolphinscheduler.common.enums.TaskType;
import org.apache.dolphinscheduler.common.enums.dq.CheckType;
import org.apache.dolphinscheduler.common.enums.dq.DqFailureStrategy;
import org.apache.dolphinscheduler.common.enums.dq.DqTaskState;
import org.apache.dolphinscheduler.common.enums.dq.OperatorType;
import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.dao.entity.DqExecuteResult;
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import org.apache.dolphinscheduler.remote.command.DBTaskAckCommand;
import org.apache.dolphinscheduler.remote.command.DBTaskResponseCommand;
import org.apache.dolphinscheduler.server.utils.AlertManager;
import org.apache.dolphinscheduler.server.utils.DataQualityResultOperator;
import org.apache.dolphinscheduler.service.process.ProcessService;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
@ -71,14 +64,15 @@ public class TaskResponseService {
private ProcessService processService;
/**
* task response worker
* data quality result operator
*/
private Thread taskResponseWorker;
@Autowired
private DataQualityResultOperator dataQualityResultOperator;
/**
* alert manager
* task response worker
*/
private AlertManager alertManager = new AlertManager();
private Thread taskResponseWorker;
@PostConstruct
public void start() {
@ -156,8 +150,7 @@ public class TaskResponseService {
taskResponseEvent.getStartTime(),
taskResponseEvent.getWorkerAddress(),
taskResponseEvent.getExecutePath(),
taskResponseEvent.getLogPath(),
taskResponseEvent.getTaskInstanceId());
taskResponseEvent.getLogPath());
}
// if taskInstance is null (maybe deleted) . retry will be meaningless . so ack success
DBTaskAckCommand taskAckCommand = new DBTaskAckCommand(ExecutionStatus.SUCCESS.getCode(), taskResponseEvent.getTaskInstanceId());
@ -173,13 +166,12 @@ public class TaskResponseService {
TaskInstance taskInstance = processService.findTaskInstanceById(taskResponseEvent.getTaskInstanceId());
if (taskInstance != null) {
operateDqExecuteResult(taskResponseEvent, taskInstance);
dataQualityResultOperator.operateDqExecuteResult(taskResponseEvent, taskInstance);
processService.changeTaskState(taskInstance, taskResponseEvent.getState(),
taskResponseEvent.getEndTime(),
taskResponseEvent.getProcessId(),
taskResponseEvent.getAppIds(),
taskResponseEvent.getTaskInstanceId(),
taskResponseEvent.getVarPool()
);
}
@ -197,103 +189,7 @@ public class TaskResponseService {
}
}
private void operateDqExecuteResult(TaskResponseEvent taskResponseEvent, TaskInstance taskInstance) {
if (TaskType.DATA_QUALITY == TaskType.valueOf(taskInstance.getTaskType())) {
processService.updateDqExecuteResultUserId(taskResponseEvent.getTaskInstanceId());
DqExecuteResult dqExecuteResult =
processService.getDqExecuteResultByTaskInstanceId(taskResponseEvent.getTaskInstanceId());
if (dqExecuteResult != null) {
//check the result ,if result is failure do some operator by failure strategy
checkDqExecuteResult(taskResponseEvent, dqExecuteResult);
}
}
}
private void checkDqExecuteResult(TaskResponseEvent taskResponseEvent, DqExecuteResult dqExecuteResult) {
if (isFailure(dqExecuteResult)) {
DqFailureStrategy dqFailureStrategy = DqFailureStrategy.of(dqExecuteResult.getFailureStrategy());
if (dqFailureStrategy != null) {
dqExecuteResult.setState(DqTaskState.FAILURE);
switch (dqFailureStrategy) {
case END:
taskResponseEvent.setState(ExecutionStatus.FAILURE);
logger.info("task is failre and end");
break;
case CONTINUE:
logger.info("task is failre and continue");
break;
case END_ALTER:
taskResponseEvent.setState(ExecutionStatus.FAILURE);
sendAlert(dqExecuteResult);
logger.info("task is failre, end and alert");
break;
case CONTINUE_ALTER:
sendAlert(dqExecuteResult);
logger.info("task is failre, continue and alert");
break;
default:
break;
}
}
} else {
dqExecuteResult.setState(DqTaskState.SUCCESS);
}
processService.updateDqExecuteResultState(dqExecuteResult);
}
private boolean isFailure(DqExecuteResult dqExecuteResult) {
CheckType checkType = dqExecuteResult.getCheckType();
double statisticsValue = dqExecuteResult.getStatisticsValue();
double comparisonValue = dqExecuteResult.getComparisonValue();
double threshold = dqExecuteResult.getThreshold();
OperatorType operatorType = OperatorType.of(dqExecuteResult.getOperator());
boolean isFailure = false;
if (operatorType != null) {
if (CheckType.STATISTICS_COMPARE_FIXED_VALUE == checkType) {
isFailure = getCompareResult(operatorType,statisticsValue,threshold);
} else if (CheckType.STATISTICS_COMPARE_COMPARISON == checkType) {
isFailure = getCompareResult(operatorType,comparisonValue - statisticsValue,threshold);
} else if (CheckType.STATISTICS_COMPARISON_PERCENTAGE == checkType) {
isFailure = getCompareResult(operatorType,statisticsValue / comparisonValue * 100,threshold);
}
}
return isFailure;
}
private void sendAlert(DqExecuteResult dqExecuteResult) {
alertManager.
sendAlterDataQualityTask(dqExecuteResult,
processService.findProcessInstanceDetailById(
Integer.parseInt(String.valueOf(dqExecuteResult.getProcessInstanceId()))));
}
private static boolean getCompareResult(OperatorType operatorType, double srcValue, double targetValue) {
BigDecimal src = BigDecimal.valueOf(srcValue);
BigDecimal target = BigDecimal.valueOf(targetValue);
switch (operatorType) {
case EQ:
return src.compareTo(target) == 0;
case LT:
return src.compareTo(target) <= -1;
case LE:
return src.compareTo(target) == 0 || src.compareTo(target) <= -1;
case GT:
return src.compareTo(target) >= 1;
case GE:
return src.compareTo(target) == 0 || src.compareTo(target) >= 1;
case NE:
return src.compareTo(target) != 0;
default:
return true;
}
}
public BlockingQueue<TaskResponseEvent> getEventQueue() {
return eventQueue;
}
}
}

47
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/AlertManager.java

@ -30,6 +30,7 @@ import org.apache.dolphinscheduler.dao.entity.ProcessAlertContent;
import org.apache.dolphinscheduler.dao.entity.ProcessDefinition;
import org.apache.dolphinscheduler.dao.entity.ProcessInstance;
import org.apache.dolphinscheduler.dao.entity.ProjectUser;
import org.apache.dolphinscheduler.dao.entity.TaskAlertContent;
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import java.util.ArrayList;
@ -254,13 +255,12 @@ public class AlertManager {
/**
* send data quality task alert
* if result is null ,send task failure message
*/
public void sendAlterDataQualityTask(DqExecuteResult result,ProcessInstance processInstance) {
public void sendDataQualityTaskExecuteResultAlert(DqExecuteResult result, ProcessInstance processInstance) {
Alert alert = new Alert();
String ruleName = result.getRuleName();
String state = result.getState().getDescription();
alert.setTitle(ruleName + " " + state);
alert.setTitle("DataQualityResult [" + result.getTaskName() + "] " + state);
String content = getDataQualityAlterContent(result);
alert.setContent(content);
alert.setAlertGroupId(processInstance.getWarningGroupId());
@ -269,6 +269,20 @@ public class AlertManager {
logger.info("add alert to db , alert: {}", alert);
}
/**
* send data quality task error alert
*/
public void sendTaskErrorAlert(TaskInstance taskInstance,ProcessInstance processInstance) {
Alert alert = new Alert();
alert.setTitle("Task [" + taskInstance.getName() + "] Failure Warning");
String content = getTaskAlterContent(taskInstance);
alert.setContent(content);
alert.setAlertGroupId(processInstance.getWarningGroupId());
alert.setCreateTime(new Date());
alertDao.addAlert(alert);
logger.info("add alert to db , alert: {}", alert);
}
/**
* getDataQualityAlterContent
* @param result DqExecuteResult
@ -294,6 +308,31 @@ public class AlertManager {
.userId(result.getUserId())
.userName(result.getUserName())
.state(result.getState())
.errorDataPath(result.getErrorOutputPath())
.build();
return JSONUtils.toJsonString(content);
}
/**
* getTaskAlterContent
* @param taskInstance TaskInstance
* @return String String
*/
public String getTaskAlterContent(TaskInstance taskInstance) {
TaskAlertContent content = TaskAlertContent.newBuilder()
.processDefinitionId(taskInstance.getProcessDefinitionId())
.processInstanceName(taskInstance.getProcessInstanceName())
.processInstanceId(taskInstance.getProcessInstanceId())
.taskInstanceId(taskInstance.getId())
.taskName(taskInstance.getName())
.taskType(taskInstance.getTaskType())
.state(taskInstance.getState())
.startTime(taskInstance.getStartTime())
.endTime(taskInstance.getEndTime())
.host(taskInstance.getHost())
.logPath(taskInstance.getLogPath())
.build();
return JSONUtils.toJsonString(content);

182
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/DataQualityResultOperator.java

@ -0,0 +1,182 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.utils;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import org.apache.dolphinscheduler.common.enums.TaskType;
import org.apache.dolphinscheduler.common.enums.dq.CheckType;
import org.apache.dolphinscheduler.common.enums.dq.DqFailureStrategy;
import org.apache.dolphinscheduler.common.enums.dq.DqTaskState;
import org.apache.dolphinscheduler.common.enums.dq.OperatorType;
import org.apache.dolphinscheduler.dao.entity.DqExecuteResult;
import org.apache.dolphinscheduler.dao.entity.ProcessInstance;
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskResponseEvent;
import org.apache.dolphinscheduler.service.process.ProcessService;
import java.math.BigDecimal;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
/**
* DataQualityResultOperator
*/
@Component
public class DataQualityResultOperator {
/**
* logger
*/
private final Logger logger = LoggerFactory.getLogger(DataQualityResultOperator.class);
/**
* process service
*/
@Autowired
private ProcessService processService;
/**
* alert manager
*/
private final AlertManager alertManager = new AlertManager();
public void operateDqExecuteResult(TaskResponseEvent taskResponseEvent, TaskInstance taskInstance) {
if (TaskType.DATA_QUALITY == TaskType.valueOf(taskInstance.getTaskType())) {
ProcessInstance processInstance =
processService.findProcessInstanceDetailById(
Integer.parseInt(String.valueOf(taskInstance.getProcessInstanceId())));
if (taskResponseEvent.getState().typeIsFailure()
|| taskResponseEvent.getState().typeIsCancel()) {
processService.deleteDqExecuteResultByTaskInstanceId(taskInstance.getId());
processService.deleteTaskStatisticsValueByTaskInstanceId(taskInstance.getId());
sendDqTaskErrorAlert(taskInstance,processInstance);
return;
}
processService.updateDqExecuteResultUserId(taskInstance.getId());
DqExecuteResult dqExecuteResult =
processService.getDqExecuteResultByTaskInstanceId(taskInstance.getId());
if (dqExecuteResult != null) {
//check the result ,if result is failure do some operator by failure strategy
checkDqExecuteResult(taskResponseEvent, dqExecuteResult, processInstance);
}
}
}
private void checkDqExecuteResult(TaskResponseEvent taskResponseEvent,
DqExecuteResult dqExecuteResult,
ProcessInstance processInstance) {
if (isFailure(dqExecuteResult)) {
DqFailureStrategy dqFailureStrategy = DqFailureStrategy.of(dqExecuteResult.getFailureStrategy());
if (dqFailureStrategy != null) {
dqExecuteResult.setState(DqTaskState.FAILURE);
sendDqTaskResultAlert(dqExecuteResult,processInstance);
switch (dqFailureStrategy) {
case ALERT:
logger.info("task is failure, continue and alert");
break;
case BLOCK:
taskResponseEvent.setState(ExecutionStatus.FAILURE);
logger.info("task is failure, end and alert");
break;
default:
break;
}
}
} else {
dqExecuteResult.setState(DqTaskState.SUCCESS);
}
processService.updateDqExecuteResultState(dqExecuteResult);
}
private boolean isFailure(DqExecuteResult dqExecuteResult) {
CheckType checkType = dqExecuteResult.getCheckType();
double statisticsValue = dqExecuteResult.getStatisticsValue();
double comparisonValue = dqExecuteResult.getComparisonValue();
double threshold = dqExecuteResult.getThreshold();
OperatorType operatorType = OperatorType.of(dqExecuteResult.getOperator());
boolean isFailure = false;
if (operatorType != null) {
double srcValue = 0;
switch (checkType) {
case COMPARISON_MINUS_STATISTICS:
srcValue = comparisonValue - statisticsValue;
isFailure = getCompareResult(operatorType,srcValue,threshold);
break;
case STATISTICS_MINUS_COMPARISON:
srcValue = statisticsValue - comparisonValue;
isFailure = getCompareResult(operatorType,srcValue,threshold);
break;
case STATISTICS_COMPARISON_PERCENTAGE:
if (comparisonValue > 0) {
srcValue = statisticsValue / comparisonValue * 100;
}
isFailure = getCompareResult(operatorType,srcValue,threshold);
break;
case STATISTICS_COMPARISON_DIFFERENCE_COMPARISON_PERCENTAGE:
if (comparisonValue > 0) {
srcValue = Math.abs(comparisonValue - statisticsValue) / comparisonValue * 100;
}
isFailure = getCompareResult(operatorType,srcValue,threshold);
break;
default:
break;
}
}
return isFailure;
}
private void sendDqTaskResultAlert(DqExecuteResult dqExecuteResult, ProcessInstance processInstance) {
alertManager.sendDataQualityTaskExecuteResultAlert(dqExecuteResult,processInstance);
}
private void sendDqTaskErrorAlert(TaskInstance taskInstance, ProcessInstance processInstance) {
alertManager.sendTaskErrorAlert(taskInstance,processInstance);
}
private boolean getCompareResult(OperatorType operatorType, double srcValue, double targetValue) {
BigDecimal src = BigDecimal.valueOf(srcValue);
BigDecimal target = BigDecimal.valueOf(targetValue);
switch (operatorType) {
case EQ:
return src.compareTo(target) == 0;
case LT:
return src.compareTo(target) <= -1;
case LE:
return src.compareTo(target) == 0 || src.compareTo(target) <= -1;
case GT:
return src.compareTo(target) >= 1;
case GE:
return src.compareTo(target) == 0 || src.compareTo(target) >= 1;
case NE:
return src.compareTo(target) != 0;
default:
return true;
}
}
}

112
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/JdbcUrlParser.java

@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.utils;
import static org.apache.dolphinscheduler.common.Constants.COLON;
import static org.apache.dolphinscheduler.common.Constants.DOUBLE_SLASH;
import static org.apache.dolphinscheduler.common.Constants.MYSQL;
import static org.apache.dolphinscheduler.common.Constants.POSTGRESQL;
import static org.apache.dolphinscheduler.common.Constants.QUESTION;
import static org.apache.dolphinscheduler.common.Constants.SEMICOLON;
import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH;
import org.apache.dolphinscheduler.common.enums.DbType;
import org.apache.dolphinscheduler.common.model.JdbcInfo;
import org.apache.dolphinscheduler.common.utils.StringUtils;
/**
* JdbcUrlParser
*/
public class JdbcUrlParser {
private JdbcUrlParser() {
throw new IllegalStateException("Utility class");
}
public static DbType getDbType(String datasourceType) {
switch (datasourceType.toUpperCase()) {
case MYSQL:
return DbType.MYSQL;
case POSTGRESQL:
return DbType.POSTGRESQL;
default:
return null;
}
}
public static JdbcInfo getJdbcInfo(String jdbcUrl) {
JdbcInfo jdbcInfo = new JdbcInfo();
int pos;
int pos1;
int pos2;
String tempUri;
if (jdbcUrl == null || !jdbcUrl.startsWith("jdbc:") || (pos1 = jdbcUrl.indexOf(COLON, 5)) == -1) {
return null;
}
String driverName = jdbcUrl.substring(5, pos1);
String params = "";
String host = "";
String database = "";
String port = "";
if (((pos2 = jdbcUrl.indexOf(SEMICOLON, pos1)) == -1) && ((pos2 = jdbcUrl.indexOf(QUESTION, pos1)) == -1)) {
tempUri = jdbcUrl.substring(pos1 + 1);
} else {
tempUri = jdbcUrl.substring(pos1 + 1, pos2);
params = jdbcUrl.substring(pos2 + 1);
}
if (tempUri.startsWith(DOUBLE_SLASH)) {
if ((pos = tempUri.indexOf(SINGLE_SLASH, 2)) != -1) {
host = tempUri.substring(2, pos);
database = tempUri.substring(pos + 1);
if ((pos = host.indexOf(COLON)) != -1) {
port = host.substring(pos + 1);
host = host.substring(0, pos);
}
}
} else {
database = tempUri;
}
if (StringUtils.isEmpty(database)) {
return null;
}
if (database.contains(QUESTION)) {
database = database.substring(0, database.indexOf(QUESTION));
}
if (database.contains(SEMICOLON)) {
database = database.substring(0, database.indexOf(SEMICOLON));
}
jdbcInfo.setDriverName(driverName);
jdbcInfo.setHost(host);
jdbcInfo.setPort(port);
jdbcInfo.setDatabase(database);
jdbcInfo.setParams(params);
jdbcInfo.setAddress("jdbc:" + driverName + "://" + host + COLON + port);
return jdbcInfo;
}
}

36
dolphinscheduler-data-quality/src/main/java/org/apache/dolphinscheduler/data/quality/utils/JdbcUtils.java → dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/Md5Utils.java

@ -15,30 +15,40 @@
* limitations under the License.
*/
package org.apache.dolphinscheduler.data.quality.utils;
package org.apache.dolphinscheduler.server.utils;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.Base64;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* JdbcUtil
* Md5Utils
*/
public class JdbcUtils {
public class Md5Utils {
private static final Logger logger = LoggerFactory.getLogger(JdbcUtils.class);
private static final Logger logger = LoggerFactory.getLogger(Md5Utils.class);
private JdbcUtils() {
throw new UnsupportedOperationException("Construct JdbcUtils");
private Md5Utils() {
throw new IllegalStateException("Utility class");
}
public static boolean isJdbcDriverLoaded(String driver) {
public static String getMd5(String src, boolean isUpper) {
String md5 = "";
try {
Class.forName(driver);
return true;
} catch (ClassNotFoundException e) {
logger.error("JDBC driver $driver provided is not found in class path", e);
return false;
MessageDigest md = MessageDigest.getInstance("SHA-256");
Base64.Encoder encoder = Base64.getEncoder();
md5 = encoder.encodeToString(md.digest(src.getBytes(StandardCharsets.UTF_8)));
} catch (Exception e) {
logger.error("get md5 error: {}", e.getMessage());
}
}
if (isUpper) {
md5 = md5.toUpperCase();
}
return md5;
}
}

330
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/RuleParserUtils.java

@ -18,13 +18,25 @@
package org.apache.dolphinscheduler.server.utils;
import static org.apache.dolphinscheduler.common.Constants.AND;
import static org.apache.dolphinscheduler.common.Constants.BATCH;
import static org.apache.dolphinscheduler.common.Constants.COMPARISON_TABLE;
import static org.apache.dolphinscheduler.common.Constants.DATABASE;
import static org.apache.dolphinscheduler.common.Constants.DRIVER;
import static org.apache.dolphinscheduler.common.Constants.ERROR_OUTPUT_PATH;
import static org.apache.dolphinscheduler.common.Constants.HDFS_FILE;
import static org.apache.dolphinscheduler.common.Constants.INDEX;
import static org.apache.dolphinscheduler.common.Constants.INPUT_TABLE;
import static org.apache.dolphinscheduler.common.Constants.OUTPUT_TABLE;
import static org.apache.dolphinscheduler.common.Constants.PARAMETER_BUSINESS_DATE;
import static org.apache.dolphinscheduler.common.Constants.PARAMETER_CURRENT_DATE;
import static org.apache.dolphinscheduler.common.Constants.PARAMETER_DATETIME;
import static org.apache.dolphinscheduler.common.Constants.PASSWORD;
import static org.apache.dolphinscheduler.common.Constants.PATH;
import static org.apache.dolphinscheduler.common.Constants.SQL;
import static org.apache.dolphinscheduler.common.Constants.SRC_FILTER;
import static org.apache.dolphinscheduler.common.Constants.SRC_TABLE;
import static org.apache.dolphinscheduler.common.Constants.STATISTICS_EXECUTE_SQL;
import static org.apache.dolphinscheduler.common.Constants.STATISTICS_TABLE;
import static org.apache.dolphinscheduler.common.Constants.TABLE;
import static org.apache.dolphinscheduler.common.Constants.TARGET_FILTER;
import static org.apache.dolphinscheduler.common.Constants.TARGET_TABLE;
@ -35,6 +47,7 @@ import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.DbType;
import org.apache.dolphinscheduler.common.enums.dq.ExecuteSqlType;
import org.apache.dolphinscheduler.common.exception.DolphinException;
import org.apache.dolphinscheduler.common.utils.CollectionUtils;
import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.ParameterUtils;
import org.apache.dolphinscheduler.common.utils.StringUtils;
@ -43,11 +56,12 @@ import org.apache.dolphinscheduler.dao.datasource.DataSourceFactory;
import org.apache.dolphinscheduler.dao.entity.DqRuleExecuteSql;
import org.apache.dolphinscheduler.dao.entity.DqRuleInputEntry;
import org.apache.dolphinscheduler.server.entity.DataQualityTaskExecutionContext;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ConnectorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ExecutorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.WriterParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.BaseConfig;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.EnvConfig;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parser.MappingColumn;
import org.apache.commons.collections.MapUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@ -70,37 +84,40 @@ public class RuleParserUtils {
private static final String AND_TARGET_FILTER = "AND (${target_filter})";
private static final String WHERE_TARGET_FILTER = "WHERE (${target_filter})";
public static List<ConnectorParameter> getConnectorParameterList(
public static List<BaseConfig> getReaderConfigList(
Map<String, String> inputParameterValue,
DataQualityTaskExecutionContext dataQualityTaskExecutionContext) throws DolphinException {
List<ConnectorParameter> connectorParameterList = new ArrayList<>();
List<BaseConfig> readerConfigList = new ArrayList<>();
if (StringUtils.isNotEmpty(dataQualityTaskExecutionContext.getSourceConnectorType())) {
BaseDataSource baseDataSource = DataSourceFactory.getDatasource
BaseDataSource sourceDataSource = DataSourceFactory.getDatasource
(DbType.of(dataQualityTaskExecutionContext.getSourceType()),
dataQualityTaskExecutionContext.getSourceConnectionParams());
ConnectorParameter sourceConnectorParameter = new ConnectorParameter();
sourceConnectorParameter.setType(dataQualityTaskExecutionContext.getSourceConnectorType());
BaseConfig sourceBaseConfig = new BaseConfig();
sourceBaseConfig.setType(dataQualityTaskExecutionContext.getSourceConnectorType());
Map<String,Object> config = new HashMap<>();
if (baseDataSource != null) {
config.put(DATABASE,baseDataSource.getDatabase());
if (sourceDataSource != null) {
config.put(DATABASE,sourceDataSource.getDatabase());
config.put(TABLE,inputParameterValue.get(SRC_TABLE));
config.put(URL,baseDataSource.getJdbcUrl());
config.put(USER,baseDataSource.getUser());
config.put(PASSWORD,baseDataSource.getPassword());
config.put(URL,sourceDataSource.getJdbcUrl());
config.put(USER,sourceDataSource.getUser());
config.put(PASSWORD,sourceDataSource.getPassword());
config.put(DRIVER, DataSourceFactory.getDriver(DbType.of(dataQualityTaskExecutionContext.getSourceType())));
String outputTable = sourceDataSource.getDatabase() + "_" + inputParameterValue.get(SRC_TABLE);
config.put(OUTPUT_TABLE,outputTable);
inputParameterValue.put(SRC_TABLE,outputTable);
}
sourceConnectorParameter.setConfig(config);
sourceBaseConfig.setConfig(config);
connectorParameterList.add(sourceConnectorParameter);
readerConfigList.add(sourceBaseConfig);
}
if (StringUtils.isNotEmpty(dataQualityTaskExecutionContext.getTargetConnectorType())) {
BaseDataSource targetDataSource = DataSourceFactory.getDatasource(DbType.of(dataQualityTaskExecutionContext.getTargetType()),
dataQualityTaskExecutionContext.getTargetConnectionParams());
ConnectorParameter targetConnectorParameter = new ConnectorParameter();
targetConnectorParameter.setType(dataQualityTaskExecutionContext.getTargetConnectorType());
BaseConfig targetBaseConfig = new BaseConfig();
targetBaseConfig.setType(dataQualityTaskExecutionContext.getTargetConnectorType());
Map<String,Object> config = new HashMap<>();
if (targetDataSource != null) {
config.put(DATABASE,targetDataSource.getDatabase());
@ -109,18 +126,21 @@ public class RuleParserUtils {
config.put(USER,targetDataSource.getUser());
config.put(PASSWORD,targetDataSource.getPassword());
config.put(DRIVER, DataSourceFactory.getDriver(DbType.of(dataQualityTaskExecutionContext.getTargetType())));
String outputTable = targetDataSource.getDatabase() + "_" + inputParameterValue.get(TARGET_TABLE);
config.put(OUTPUT_TABLE,outputTable);
inputParameterValue.put(TARGET_TABLE,outputTable);
}
targetConnectorParameter.setConfig(config);
targetBaseConfig.setConfig(config);
connectorParameterList.add(targetConnectorParameter);
readerConfigList.add(targetBaseConfig);
}
return connectorParameterList;
return readerConfigList;
}
public static int replaceExecuteSqlPlaceholder(List<DqRuleExecuteSql> executeSqlList,
int index, Map<String, String> inputParameterValueResult,
List<ExecutorParameter> executorParameterList) {
List<BaseConfig> transformerConfigList) {
List<DqRuleExecuteSql> midExecuteSqlDefinitionList
= getExecuteSqlListByType(executeSqlList, ExecuteSqlType.MIDDLE);
@ -137,39 +157,57 @@ public class RuleParserUtils {
checkAndReplace(statisticsExecuteSqlDefinitionList,inputParameterValueResult.get(TARGET_FILTER),AND_TARGET_FILTER);
checkAndReplace(statisticsExecuteSqlDefinitionList,inputParameterValueResult.get(TARGET_FILTER),WHERE_TARGET_FILTER);
if (midExecuteSqlDefinitionList != null) {
if (CollectionUtils.isNotEmpty(midExecuteSqlDefinitionList)) {
for (DqRuleExecuteSql executeSqlDefinition:midExecuteSqlDefinitionList) {
index = setExecutorParameter(
index = setTransformerConfig(
index,
inputParameterValueResult,
executorParameterList,
transformerConfigList,
executeSqlDefinition);
}
}
for (DqRuleExecuteSql executeSqlDefinition:statisticsExecuteSqlDefinitionList) {
index = setExecutorParameter(
index,
inputParameterValueResult,
executorParameterList,
executeSqlDefinition);
if (CollectionUtils.isNotEmpty(statisticsExecuteSqlDefinitionList)) {
for (DqRuleExecuteSql executeSqlDefinition:statisticsExecuteSqlDefinitionList) {
index = setTransformerConfig(
index,
inputParameterValueResult,
transformerConfigList,
executeSqlDefinition);
}
}
return index;
}
private static int setExecutorParameter(int index,
private static int setTransformerConfig(int index,
Map<String, String> inputParameterValueResult,
List<ExecutorParameter> executorParameterList,
DqRuleExecuteSql executeSqlDefinition) {
ExecutorParameter executorParameter = new ExecutorParameter();
executorParameter.setIndex(index++ + "");
executorParameter.setExecuteSql(ParameterUtils.convertParameterPlaceholders(executeSqlDefinition.getSql(),inputParameterValueResult));
executorParameter.setTableAlias(executeSqlDefinition.getTableAlias());
executorParameterList.add(executorParameter);
List<BaseConfig> transformerConfigList,
DqRuleExecuteSql executeSqlDefinition) {
Map<String,Object> config = new HashMap<>();
config.put(INDEX,index++);
config.put(SQL,ParameterUtils.convertParameterPlaceholders(executeSqlDefinition.getSql(),inputParameterValueResult));
config.put(OUTPUT_TABLE,executeSqlDefinition.getTableAlias());
BaseConfig transformerConfig = new BaseConfig(SQL,config);
transformerConfigList.add(transformerConfig);
return index;
}
public static List<BaseConfig> getSingleTableCustomSqlTransformerConfigList(int index,
Map<String, String> inputParameterValueResult) {
List<BaseConfig> list = new ArrayList<>();
Map<String,Object> config = new HashMap<>();
config.put(INDEX,index + 1);
config.put(SQL,ParameterUtils.convertParameterPlaceholders(inputParameterValueResult.get(STATISTICS_EXECUTE_SQL),inputParameterValueResult));
config.put(OUTPUT_TABLE,inputParameterValueResult.get(SRC_TABLE));
inputParameterValueResult.put(STATISTICS_TABLE,inputParameterValueResult.get(SRC_TABLE));
BaseConfig transformerConfig = new BaseConfig(SQL,config);
list.add(transformerConfig);
return list;
}
private static String getCoalesceString(String table, String column) {
return "coalesce(" + table + "." + column + ", '')";
}
@ -193,17 +231,17 @@ public class RuleParserUtils {
return defaultInputParameterValue;
}
public static List<WriterParameter> getWriterParameterList(
public static List<BaseConfig> getWriterConfigList(
String sql,
DataQualityTaskExecutionContext dataQualityTaskExecutionContext) throws DolphinException {
List<WriterParameter> writerParameterList = new ArrayList<>();
List<BaseConfig> writerConfigList = new ArrayList<>();
if (StringUtils.isNotEmpty(dataQualityTaskExecutionContext.getWriterConnectorType())) {
BaseDataSource writerDataSource = DataSourceFactory.getDatasource(DbType.of(dataQualityTaskExecutionContext.getWriterType()),
dataQualityTaskExecutionContext.getWriterConnectionParams());
WriterParameter writerParameter = new WriterParameter();
writerParameter.setType(dataQualityTaskExecutionContext.getWriterConnectorType());
BaseConfig writerConfig = new BaseConfig();
writerConfig.setType(dataQualityTaskExecutionContext.getWriterConnectorType());
Map<String,Object> config = new HashMap<>();
if (writerDataSource != null) {
config.put(DATABASE,writerDataSource.getDatabase());
@ -214,13 +252,74 @@ public class RuleParserUtils {
config.put(DRIVER, DataSourceFactory.getDriver(DbType.of(dataQualityTaskExecutionContext.getWriterType())));
config.put(SQL,sql);
}
writerParameter.setConfig(config);
writerConfig.setConfig(config);
writerConfigList.add(writerConfig);
}
return writerConfigList;
}
public static void addStatisticsValueTableReaderConfig (List<BaseConfig> readerConfigList,
DataQualityTaskExecutionContext dataQualityTaskExecutionContext) {
if (dataQualityTaskExecutionContext.isComparisonNeedStatisticsValueTable()) {
List<BaseConfig> statisticsBaseConfigList = RuleParserUtils.getStatisticsValueConfigReaderList(dataQualityTaskExecutionContext);
readerConfigList.addAll(statisticsBaseConfigList);
}
}
public static List<BaseConfig> getStatisticsValueConfigWriterList (
String sql,
Map<String, String> inputParameterValueResult,
DataQualityTaskExecutionContext dataQualityTaskExecutionContext) throws DolphinException {
writerParameterList.add(writerParameter);
List<BaseConfig> writerConfigList = new ArrayList<>();
if (StringUtils.isNotEmpty(dataQualityTaskExecutionContext.getStatisticsValueConnectorType())) {
BaseConfig writerConfig = getStatisticsValueConfig(dataQualityTaskExecutionContext);
if (writerConfig != null) {
writerConfig.getConfig().put(SQL,ParameterUtils.convertParameterPlaceholders(sql,inputParameterValueResult));
}
writerConfigList.add(writerConfig);
}
return writerConfigList;
}
return writerParameterList;
public static List<BaseConfig> getStatisticsValueConfigReaderList (
DataQualityTaskExecutionContext dataQualityTaskExecutionContext) throws DolphinException {
List<BaseConfig> readerConfigList = new ArrayList<>();
if (StringUtils.isNotEmpty(dataQualityTaskExecutionContext.getStatisticsValueConnectorType())) {
BaseConfig readerConfig = getStatisticsValueConfig(dataQualityTaskExecutionContext);
if (readerConfig != null) {
readerConfig.getConfig().put(OUTPUT_TABLE,dataQualityTaskExecutionContext.getStatisticsValueTable());
}
readerConfigList.add(readerConfig);
}
return readerConfigList;
}
public static BaseConfig getStatisticsValueConfig (
DataQualityTaskExecutionContext dataQualityTaskExecutionContext) throws DolphinException {
BaseConfig baseConfig = null;
if (StringUtils.isNotEmpty(dataQualityTaskExecutionContext.getStatisticsValueConnectorType())) {
BaseDataSource writerDataSource = DataSourceFactory.getDatasource(DbType.of(dataQualityTaskExecutionContext.getStatisticsValueType()),
dataQualityTaskExecutionContext.getStatisticsValueWriterConnectionParams());
baseConfig = new BaseConfig();
baseConfig.setType(dataQualityTaskExecutionContext.getStatisticsValueConnectorType());
Map<String,Object> config = new HashMap<>();
if (writerDataSource != null) {
config.put(DATABASE,writerDataSource.getDatabase());
config.put(TABLE,dataQualityTaskExecutionContext.getStatisticsValueTable());
config.put(URL,writerDataSource.getJdbcUrl());
config.put(USER,writerDataSource.getUser());
config.put(PASSWORD,writerDataSource.getPassword());
config.put(DRIVER, DataSourceFactory.getDriver(DbType.of(dataQualityTaskExecutionContext.getWriterType())));
}
baseConfig.setConfig(config);
}
return baseConfig;
}
public static String getOnClause(List<MappingColumn> mappingColumnList,Map<String,String> inputParameterValueResult) {
@ -243,39 +342,74 @@ public class RuleParserUtils {
return srcColumnNotNull + AND + targetColumnIsNull;
}
public static List<WriterParameter> getWriterParameterList(
public static List<BaseConfig> getWriterConfigList(
int index,
Map<String, String> inputParameterValueResult,
List<ExecutorParameter> executorParameterList,
List<BaseConfig> transformerConfigList,
DataQualityTaskExecutionContext dataQualityTaskExecutionContext,
String writerSql) throws DolphinException {
List<DqRuleExecuteSql> comparisonExecuteSqlList =
getExecuteSqlListByType(dataQualityTaskExecutionContext.getExecuteSqlList(), ExecuteSqlType.COMPARISON);
DqRuleExecuteSql comparisonSql = comparisonExecuteSqlList.get(0);
inputParameterValueResult.put(COMPARISON_TABLE,comparisonSql.getTableAlias());
if (CollectionUtils.isNotEmpty(comparisonExecuteSqlList)) {
DqRuleExecuteSql comparisonSql = comparisonExecuteSqlList.get(0);
inputParameterValueResult.put(COMPARISON_TABLE,comparisonSql.getTableAlias());
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(SRC_FILTER),AND_SRC_FILTER);
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(SRC_FILTER),WHERE_SRC_FILTER);
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(TARGET_FILTER),AND_TARGET_FILTER);
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(TARGET_FILTER),WHERE_TARGET_FILTER);
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(SRC_FILTER),AND_SRC_FILTER);
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(SRC_FILTER),WHERE_SRC_FILTER);
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(TARGET_FILTER),AND_TARGET_FILTER);
checkAndReplace(comparisonExecuteSqlList,inputParameterValueResult.get(TARGET_FILTER),WHERE_TARGET_FILTER);
for (DqRuleExecuteSql executeSqlDefinition:comparisonExecuteSqlList) {
index = setExecutorParameter(
index,
inputParameterValueResult,
executorParameterList,
executeSqlDefinition);
for (DqRuleExecuteSql executeSqlDefinition:comparisonExecuteSqlList) {
index = setTransformerConfig(
index,
inputParameterValueResult,
transformerConfigList,
executeSqlDefinition);
}
}
return getWriterParameterList(
return getWriterConfigList(
ParameterUtils.convertParameterPlaceholders(writerSql,inputParameterValueResult),
dataQualityTaskExecutionContext
);
}
public static List<BaseConfig> getAllWriterConfigList (
Map<String, String> inputParameterValue,
DataQualityTaskExecutionContext context,
int index,
List<BaseConfig> transformerConfigList,
String writerSql,
String statisticsValueWriterSql) {
List<BaseConfig> writerConfigList = RuleParserUtils.getWriterConfigList(
index,
inputParameterValue,
transformerConfigList,
context,
writerSql);
writerConfigList.addAll(
RuleParserUtils.getStatisticsValueConfigWriterList(
statisticsValueWriterSql,
inputParameterValue,
context));
BaseConfig errorOutputWriter = RuleParserUtils.getErrorOutputWriter(inputParameterValue,context);
if (errorOutputWriter != null) {
writerConfigList.add(errorOutputWriter);
}
return writerConfigList;
}
public static List<DqRuleExecuteSql> getExecuteSqlListByType(
List<DqRuleExecuteSql> allExecuteSqlList, ExecuteSqlType executeSqlType) {
if (CollectionUtils.isEmpty(allExecuteSqlList)) {
return allExecuteSqlList;
}
return allExecuteSqlList
.stream()
.filter(x -> x.getType() == executeSqlType)
@ -283,7 +417,7 @@ public class RuleParserUtils {
}
private static void checkAndReplace(List<DqRuleExecuteSql> list, String checkValue, String replaceSrc) {
if (StringUtils.isEmpty(checkValue)) {
if (StringUtils.isEmpty(checkValue) && CollectionUtils.isNotEmpty(list)) {
for (DqRuleExecuteSql executeSqlDefinition:list) {
String sql = executeSqlDefinition.getSql();
sql = sql.replace(replaceSrc,"");
@ -323,4 +457,76 @@ public class RuleParserUtils {
return list;
}
public static EnvConfig getEnvConfig() {
EnvConfig envConfig = new EnvConfig();
envConfig.setType(BATCH);
return envConfig;
}
public static BaseConfig getErrorOutputWriter(Map<String, String> inputParameterValueResult,
DataQualityTaskExecutionContext dataQualityTaskExecutionContext) {
DqRuleExecuteSql errorOutputSql = null;
if (CollectionUtils.isEmpty(dataQualityTaskExecutionContext.getExecuteSqlList())) {
return null;
}
for (DqRuleExecuteSql executeSql : dataQualityTaskExecutionContext.getExecuteSqlList()) {
if (executeSql.isErrorOutputSql()) {
errorOutputSql = executeSql;
break;
}
}
BaseConfig baseConfig = null;
if (StringUtils.isNotEmpty(inputParameterValueResult.get(ERROR_OUTPUT_PATH))
&& errorOutputSql != null) {
baseConfig = new BaseConfig();
Map<String,Object> config = new HashMap<>();
config.put(PATH,inputParameterValueResult.get(ERROR_OUTPUT_PATH));
config.put(INPUT_TABLE,errorOutputSql.getTableAlias());
baseConfig.setConfig(config);
baseConfig.setType(HDFS_FILE);
}
return baseConfig;
}
public static String generateUniqueCode(Map<String, String> inputParameterValue) {
if (MapUtils.isEmpty(inputParameterValue)) {
return "-1";
}
Map<String,String> newInputParameterValue = new HashMap<>(inputParameterValue);
newInputParameterValue.remove("rule_type");
newInputParameterValue.remove("rule_name");
newInputParameterValue.remove("create_time");
newInputParameterValue.remove("update_time");
newInputParameterValue.remove("process_definition_id");
newInputParameterValue.remove("process_instance_id");
newInputParameterValue.remove("task_instance_id");
newInputParameterValue.remove("check_type");
newInputParameterValue.remove("operator");
newInputParameterValue.remove("threshold");
newInputParameterValue.remove("failure_strategy");
newInputParameterValue.remove("operator");
newInputParameterValue.remove("threshold");
newInputParameterValue.remove("data_time");
newInputParameterValue.remove("error_output_path");
newInputParameterValue.remove("comparison_type");
newInputParameterValue.remove("comparison_name");
newInputParameterValue.remove("comparison_table");
newInputParameterValue.remove(PARAMETER_CURRENT_DATE);
newInputParameterValue.remove(PARAMETER_BUSINESS_DATE);
newInputParameterValue.remove(PARAMETER_DATETIME);
StringBuilder sb = new StringBuilder();
for (String value : newInputParameterValue.values()) {
sb.append(value);
}
return Md5Utils.getMd5(sb.toString(),true);
}
}

70
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/DataQualityTask.java

@ -17,7 +17,13 @@
package org.apache.dolphinscheduler.server.worker.task.dq;
import org.apache.dolphinscheduler.common.Constants;
import static org.apache.dolphinscheduler.common.Constants.DATA_TIME;
import static org.apache.dolphinscheduler.common.Constants.ERROR_OUTPUT_PATH;
import static org.apache.dolphinscheduler.common.Constants.REGEXP_PATTERN;
import static org.apache.dolphinscheduler.common.Constants.SLASH;
import static org.apache.dolphinscheduler.common.Constants.UNDERLINE;
import static org.apache.dolphinscheduler.common.Constants.YYYY_MM_DD_HH_MM_SS;
import org.apache.dolphinscheduler.common.enums.CommandType;
import org.apache.dolphinscheduler.common.exception.DolphinException;
import org.apache.dolphinscheduler.common.process.Property;
@ -89,17 +95,7 @@ public class DataQualityTask extends AbstractYarnTask {
DataQualityTaskExecutionContext dataQualityTaskExecutionContext
= dqTaskExecutionContext.getDataQualityTaskExecutionContext();
DateTimeFormatter df = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
LocalDateTime time = LocalDateTime.now();
String now = df.format(time);
inputParameter.put("rule_type",dataQualityTaskExecutionContext.getRuleType().getCode() + "");
inputParameter.put("rule_name","'" + dataQualityTaskExecutionContext.getRuleName() + "'");
inputParameter.put("create_time","'" + now + "'");
inputParameter.put("update_time","'" + now + "'");
inputParameter.put("process_definition_id",dqTaskExecutionContext.getProcessDefineId() + "");
inputParameter.put("process_instance_id",dqTaskExecutionContext.getProcessInstanceId() + "");
inputParameter.put("task_instance_id",dqTaskExecutionContext.getTaskInstanceId() + "");
operateInputParameter(inputParameter, dataQualityTaskExecutionContext);
RuleManager ruleManager = new RuleManager(
inputParameter,
@ -110,8 +106,8 @@ public class DataQualityTask extends AbstractYarnTask {
dataQualityParameters
.getSparkParameters()
.setMainArgs(
"\"" + replaceDoubleBrackets(StringUtils.escapeJava(JSONUtils.toJsonString(dataQualityConfiguration))) + "\"");
.setMainArgs("\""
+ StringUtils.replaceDoubleBrackets(StringUtils.escapeJava(JSONUtils.toJsonString(dataQualityConfiguration))) + "\"");
dataQualityParameters
.getSparkParameters()
@ -120,6 +116,39 @@ public class DataQualityTask extends AbstractYarnTask {
setMainJarName();
}
private void operateInputParameter(Map<String, String> inputParameter, DataQualityTaskExecutionContext dataQualityTaskExecutionContext) {
DateTimeFormatter df = DateTimeFormatter.ofPattern(YYYY_MM_DD_HH_MM_SS);
LocalDateTime time = LocalDateTime.now();
String now = df.format(time);
inputParameter.put("rule_id", String.valueOf(dataQualityTaskExecutionContext.getRuleId()));
inputParameter.put("rule_type", String.valueOf(dataQualityTaskExecutionContext.getRuleType().getCode()));
inputParameter.put("rule_name", StringUtils.wrapperSingleQuotes(dataQualityTaskExecutionContext.getRuleName()));
inputParameter.put("create_time", StringUtils.wrapperSingleQuotes(now));
inputParameter.put("update_time", StringUtils.wrapperSingleQuotes(now));
inputParameter.put("process_definition_id", String.valueOf(dqTaskExecutionContext.getProcessDefineId()));
inputParameter.put("process_instance_id", String.valueOf(dqTaskExecutionContext.getProcessInstanceId()));
inputParameter.put("task_instance_id", String.valueOf(dqTaskExecutionContext.getTaskInstanceId()));
if (StringUtils.isEmpty(inputParameter.get(DATA_TIME))) {
inputParameter.put(DATA_TIME,StringUtils.wrapperSingleQuotes(now));
}
if (StringUtils.isNotEmpty(inputParameter.get(REGEXP_PATTERN))) {
inputParameter.put(REGEXP_PATTERN,StringUtils.escapeJava(StringUtils.escapeJava(inputParameter.get(REGEXP_PATTERN))));
}
if (StringUtils.isNotEmpty(dataQualityTaskExecutionContext.getHdfsPath())) {
inputParameter.put(ERROR_OUTPUT_PATH,
dataQualityTaskExecutionContext.getHdfsPath()
+ SLASH + dqTaskExecutionContext.getProcessDefineId()
+ UNDERLINE + dqTaskExecutionContext.getProcessInstanceId()
+ UNDERLINE + dqTaskExecutionContext.getTaskName());
} else {
inputParameter.put(ERROR_OUTPUT_PATH,"");
}
}
@Override
protected String buildCommand() {
List<String> args = new ArrayList<>();
@ -151,7 +180,7 @@ public class DataQualityTask extends AbstractYarnTask {
@Override
protected void setMainJarName() {
ResourceInfo mainJar = new ResourceInfo();
mainJar.setRes(System.getProperty("user.dir") + File.separator + "lib" + File.separator + CommonUtils.getDqJarName());
mainJar.setRes(System.getProperty("user.dir") + File.separator + "lib" + File.separator + CommonUtils.getDataQualityJarName());
dataQualityParameters.getSparkParameters().setMainJar(mainJar);
}
@ -159,15 +188,4 @@ public class DataQualityTask extends AbstractYarnTask {
public AbstractParameters getParameters() {
return dataQualityParameters;
}
private String replaceDoubleBrackets(String mainParameter) {
mainParameter = mainParameter
.replace(Constants.DOUBLE_BRACKETS_LEFT, Constants.DOUBLE_BRACKETS_LEFT_SPACE)
.replace(Constants.DOUBLE_BRACKETS_RIGHT, Constants.DOUBLE_BRACKETS_RIGHT_SPACE);
if (mainParameter.contains(Constants.DOUBLE_BRACKETS_LEFT) || mainParameter.contains(Constants.DOUBLE_BRACKETS_RIGHT)) {
return replaceDoubleBrackets(mainParameter);
} else {
return mainParameter;
}
}
}

45
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/RuleManager.java

@ -17,8 +17,12 @@
package org.apache.dolphinscheduler.server.worker.task.dq.rule;
import static org.apache.dolphinscheduler.common.Constants.COMPARISON_TYPE;
import static org.apache.dolphinscheduler.common.Constants.UNIQUE_CODE;
import org.apache.dolphinscheduler.common.enums.CommandType;
import org.apache.dolphinscheduler.common.exception.DolphinException;
import org.apache.dolphinscheduler.common.utils.StringUtils;
import org.apache.dolphinscheduler.common.utils.placeholder.BusinessTimeUtils;
import org.apache.dolphinscheduler.server.entity.DataQualityTaskExecutionContext;
import org.apache.dolphinscheduler.server.utils.RuleParserUtils;
@ -40,35 +44,48 @@ public class RuleManager {
private final Map<String, String> inputParameterValue;
private final DataQualityTaskExecutionContext dataQualityTaskExecutionContext;
private static final String NONE_COMPARISON_TYPE = "0";
private static final String BASE_SQL =
"SELECT ${rule_type} as rule_type,"
"select ${rule_type} as rule_type,"
+ "${rule_name} as rule_name,"
+ "${process_definition_id} as process_definition_id,"
+ "${process_instance_id} as process_instance_id,"
+ "${task_instance_id} as task_instance_id,"
+ "${statistics_name} AS statistics_value, "
+ "${statistics_name} AS statistics_value,"
+ "${comparison_name} AS comparison_value,"
+ "${comparison_type} AS comparison_type,"
+ "${check_type} as check_type,"
+ "${threshold} as threshold, "
+ "${operator} as operator, "
+ "${failure_strategy} as failure_strategy, "
+ "${threshold} as threshold,"
+ "${operator} as operator,"
+ "${failure_strategy} as failure_strategy,"
+ "'${error_output_path}' as error_output_path,"
+ "${create_time} as create_time,"
+ "${update_time} as update_time ";
public static final String DEFAULT_COMPARISON_WRITER_SQL =
BASE_SQL + "from ${statistics_table} FULL JOIN ${comparison_table}";
BASE_SQL + "from ${statistics_table} full join ${comparison_table}";
public static final String MULTI_TABLE_COMPARISON_WRITER_SQL =
BASE_SQL
+ "from ( ${statistics_execute_sql} ) tmp1 "
+ "join "
+ "( ${comparison_execute_sql} ) tmp2 ";
+ "join ( ${comparison_execute_sql} ) tmp2";
public static final String SINGLE_TABLE_CUSTOM_SQL_WRITER_SQL =
BASE_SQL
+ "from ( ${statistics_execute_sql} ) tmp1 "
+ "join "
+ "${comparison_table}";
+ "from ( ${statistics_table} ) tmp1 "
+ "join ${comparison_table}";
public static final String TASK_STATISTICS_VALUE_WRITER_SQL =
"select "
+ "${process_definition_id} as process_definition_id,"
+ "${task_instance_id} as task_instance_id,"
+ "${rule_id} as rule_id,"
+ "${unique_code} as unique_code,"
+ "'${statistics_name}'AS statistics_name,"
+ "${statistics_name} AS statistics_value,"
+ "${data_time} as data_time,"
+ "${create_time} as create_time,"
+ "${update_time} as update_time "
+ "from ${statistics_table}";
public RuleManager(Map<String, String> inputParameterValue, DataQualityTaskExecutionContext dataQualityTaskExecutionContext) {
this.inputParameterValue = inputParameterValue;
@ -77,15 +94,17 @@ public class RuleManager {
/**
* @return DataQualityConfiguration
* @throws Exception Exception
* @throws DolphinException DolphinException
*/
public DataQualityConfiguration generateDataQualityParameter() throws DolphinException {
Map<String, String> inputParameterValueResult =
RuleParserUtils.getInputParameterMapFromEntryList(dataQualityTaskExecutionContext.getRuleInputEntryList());
inputParameterValueResult.putAll(inputParameterValue);
inputParameterValueResult.putAll(BusinessTimeUtils.getBusinessTime(CommandType.START_PROCESS, new Date()));
inputParameterValueResult.putIfAbsent(COMPARISON_TYPE, NONE_COMPARISON_TYPE);
inputParameterValueResult.put(UNIQUE_CODE,
StringUtils.wrapperSingleQuotes(RuleParserUtils.generateUniqueCode(inputParameterValueResult)));
IRuleParser ruleParser = null;
switch (dataQualityTaskExecutionContext.getRuleType()) {

8
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/WriterParameter.java → dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/BaseConfig.java

@ -22,9 +22,9 @@ import java.util.Map;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* WriterParameter
* BaseConfig
*/
public class WriterParameter {
public class BaseConfig {
@JsonProperty("type")
private String type;
@ -32,10 +32,10 @@ public class WriterParameter {
@JsonProperty("config")
private Map<String,Object> config;
public WriterParameter() {
public BaseConfig() {
}
public WriterParameter(String type, Map<String,Object> config) {
public BaseConfig(String type, Map<String,Object> config) {
this.type = type;
this.config = config;
}

81
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/DataQualityConfiguration.java

@ -29,25 +29,40 @@ public class DataQualityConfiguration {
@JsonProperty("name")
private String name;
@JsonProperty("connectors")
private List<ConnectorParameter> connectorParameters;
@JsonProperty("env")
private EnvConfig envConfig;
@JsonProperty("writers")
private List<WriterParameter> writerParams;
@JsonProperty("readers")
private List<BaseConfig> readerConfigs;
@JsonProperty("transformers")
private List<BaseConfig> transformerConfigs;
@JsonProperty("executors")
private List<ExecutorParameter> executorParameters;
@JsonProperty("writers")
private List<BaseConfig> writerConfigs;
public DataQualityConfiguration(){}
public DataQualityConfiguration(String name,
List<ConnectorParameter> connectorParameters,
List<WriterParameter> writerParams,
List<ExecutorParameter> executorParameters) {
List<BaseConfig> readerConfigs,
List<BaseConfig> writerConfigs,
List<BaseConfig> transformerConfigs) {
this.name = name;
this.readerConfigs = readerConfigs;
this.writerConfigs = writerConfigs;
this.transformerConfigs = transformerConfigs;
}
public DataQualityConfiguration(String name,
EnvConfig envConfig,
List<BaseConfig> readerConfigs,
List<BaseConfig> writerConfigs,
List<BaseConfig> transformerConfigs) {
this.name = name;
this.connectorParameters = connectorParameters;
this.writerParams = writerParams;
this.executorParameters = executorParameters;
this.envConfig = envConfig;
this.readerConfigs = readerConfigs;
this.writerConfigs = writerConfigs;
this.transformerConfigs = transformerConfigs;
}
public String getName() {
@ -58,28 +73,46 @@ public class DataQualityConfiguration {
this.name = name;
}
public List<ConnectorParameter> getConnectorParameters() {
return connectorParameters;
public EnvConfig getEnvConfig() {
return envConfig;
}
public void setEnvConfig(EnvConfig envConfig) {
this.envConfig = envConfig;
}
public void setConnectorParameters(List<ConnectorParameter> connectorParameters) {
this.connectorParameters = connectorParameters;
public List<BaseConfig> getReaderConfigs() {
return readerConfigs;
}
public List<WriterParameter> getWriterParams() {
return writerParams;
public void setReaderConfigs(List<BaseConfig> readerConfigs) {
this.readerConfigs = readerConfigs;
}
public void setWriterParams(List<WriterParameter> writerParams) {
this.writerParams = writerParams;
public List<BaseConfig> getTransformerConfigs() {
return transformerConfigs;
}
public List<ExecutorParameter> getExecutorParameters() {
return executorParameters;
public void setTransformerConfigs(List<BaseConfig> transformerConfigs) {
this.transformerConfigs = transformerConfigs;
}
public void setExecutorParameters(List<ExecutorParameter> executorParameters) {
this.executorParameters = executorParameters;
public List<BaseConfig> getWriterConfigs() {
return writerConfigs;
}
public void setWriterConfigs(List<BaseConfig> writerConfigs) {
this.writerConfigs = writerConfigs;
}
@Override
public String toString() {
return "DataQualityConfiguration{"
+ "name='" + name + '\''
+ ", envConfig=" + envConfig
+ ", readerConfigs=" + readerConfigs
+ ", transformerConfigs=" + transformerConfigs
+ ", writerConfigs=" + writerConfigs
+ '}';
}
}

34
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/EnvConfig.java

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter;
import java.util.Map;
/**
* EnvConfig
*/
public class EnvConfig extends BaseConfig {
public EnvConfig() {
}
public EnvConfig(String type, Map<String,Object> config) {
super(type,config);
}
}

68
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parameter/ExecutorParameter.java

@ -1,68 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* ExecutorParameter
*/
public class ExecutorParameter {
@JsonProperty("index")
private String index;
@JsonProperty("execute.sql")
private String executeSql;
@JsonProperty("table.alias")
private String tableAlias;
public ExecutorParameter() {
}
public ExecutorParameter(String index, String executeSql, String tableAlias) {
this.index = index;
this.executeSql = executeSql;
this.tableAlias = tableAlias;
}
public String getIndex() {
return index;
}
public void setIndex(String index) {
this.index = index;
}
public String getExecuteSql() {
return executeSql;
}
public void setExecuteSql(String executeSql) {
this.executeSql = executeSql;
}
public String getTableAlias() {
return tableAlias;
}
public void setTableAlias(String tableAlias) {
this.tableAlias = tableAlias;
}
}

39
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/MultiTableAccuracyRuleParser.java

@ -28,10 +28,8 @@ import org.apache.dolphinscheduler.dao.entity.DqRuleExecuteSql;
import org.apache.dolphinscheduler.server.entity.DataQualityTaskExecutionContext;
import org.apache.dolphinscheduler.server.utils.RuleParserUtils;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.RuleManager;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ConnectorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.BaseConfig;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.DataQualityConfiguration;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ExecutorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.WriterParameter;
import java.util.ArrayList;
import java.util.List;
@ -45,16 +43,19 @@ public class MultiTableAccuracyRuleParser implements IRuleParser {
@Override
public DataQualityConfiguration parse(Map<String, String> inputParameterValue,
DataQualityTaskExecutionContext context) throws DolphinException {
DqRuleExecuteSql statisticsSql =
RuleParserUtils.getExecuteSqlListByType(
context.getExecuteSqlList(), ExecuteSqlType.STATISTICS).get(0);
inputParameterValue.put(STATISTICS_TABLE,statisticsSql.getTableAlias());
int index = 1;
List<ConnectorParameter> connectorParameterList =
RuleParserUtils.getConnectorParameterList(inputParameterValue,context);
List<ExecutorParameter> executorParameterList = new ArrayList<>();
List<BaseConfig> readerConfigList =
RuleParserUtils.getReaderConfigList(inputParameterValue,context);
RuleParserUtils.addStatisticsValueTableReaderConfig(readerConfigList,context);
List<BaseConfig> transformerConfigList = new ArrayList<>();
List<MappingColumn> mappingColumnList = RuleParserUtils.getMappingColumnList(inputParameterValue.get(MAPPING_COLUMNS));
@ -65,21 +66,23 @@ public class MultiTableAccuracyRuleParser implements IRuleParser {
index = RuleParserUtils.replaceExecuteSqlPlaceholder(
context.getExecuteSqlList(),
index,
inputParameterValue,
executorParameterList);
List<WriterParameter> writerParameterList = RuleParserUtils.getWriterParameterList(
index,
inputParameterValue,
executorParameterList,
context,
RuleManager.DEFAULT_COMPARISON_WRITER_SQL);
transformerConfigList);
String writerSql = RuleManager.DEFAULT_COMPARISON_WRITER_SQL;
if (context.isCompareWithFixedValue()) {
writerSql = writerSql.replaceAll("full join \\$\\{comparison_table}","");
}
List<BaseConfig> writerConfigList = RuleParserUtils.getAllWriterConfigList(inputParameterValue,
context, index, transformerConfigList, writerSql,RuleManager.TASK_STATISTICS_VALUE_WRITER_SQL);
return new DataQualityConfiguration(
context.getRuleName(),
connectorParameterList,
writerParameterList,
executorParameterList);
RuleParserUtils.getEnvConfig(),
readerConfigList,
writerConfigList,
transformerConfigList);
}
}

26
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/MultiTableComparisonRuleParser.java

@ -17,16 +17,13 @@
package org.apache.dolphinscheduler.server.worker.task.dq.rule.parser;
import static org.apache.dolphinscheduler.server.worker.task.dq.rule.RuleManager.MULTI_TABLE_COMPARISON_WRITER_SQL;
import org.apache.dolphinscheduler.common.exception.DolphinException;
import org.apache.dolphinscheduler.common.utils.ParameterUtils;
import org.apache.dolphinscheduler.server.entity.DataQualityTaskExecutionContext;
import org.apache.dolphinscheduler.server.utils.RuleParserUtils;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ConnectorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.RuleManager;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.BaseConfig;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.DataQualityConfiguration;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ExecutorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.WriterParameter;
import java.util.ArrayList;
import java.util.List;
@ -41,18 +38,21 @@ public class MultiTableComparisonRuleParser implements IRuleParser {
public DataQualityConfiguration parse(Map<String, String> inputParameterValue,
DataQualityTaskExecutionContext context) throws DolphinException {
List<ConnectorParameter> connectorParameterList =
RuleParserUtils.getConnectorParameterList(inputParameterValue,context);
List<ExecutorParameter> executorParameterList = new ArrayList<>();
List<BaseConfig> readerConfigList =
RuleParserUtils.getReaderConfigList(inputParameterValue,context);
RuleParserUtils.addStatisticsValueTableReaderConfig(readerConfigList,context);
List<BaseConfig> transformerConfigList = new ArrayList<>();
List<WriterParameter> writerParameterList = RuleParserUtils.getWriterParameterList(
ParameterUtils.convertParameterPlaceholders(MULTI_TABLE_COMPARISON_WRITER_SQL,inputParameterValue),
List<BaseConfig> writerConfigList = RuleParserUtils.getWriterConfigList(
ParameterUtils.convertParameterPlaceholders(RuleManager.MULTI_TABLE_COMPARISON_WRITER_SQL,inputParameterValue),
context);
return new DataQualityConfiguration(
context.getRuleName(),
connectorParameterList,
writerParameterList,
executorParameterList);
RuleParserUtils.getEnvConfig(),
readerConfigList,
writerConfigList,
transformerConfigList);
}
}

43
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/SingleTableCustomSqlRuleParser.java

@ -21,12 +21,9 @@ import org.apache.dolphinscheduler.common.exception.DolphinException;
import org.apache.dolphinscheduler.server.entity.DataQualityTaskExecutionContext;
import org.apache.dolphinscheduler.server.utils.RuleParserUtils;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.RuleManager;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ConnectorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.BaseConfig;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.DataQualityConfiguration;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ExecutorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.WriterParameter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@ -40,29 +37,33 @@ public class SingleTableCustomSqlRuleParser implements IRuleParser {
DataQualityTaskExecutionContext context) throws DolphinException {
int index = 1;
List<ConnectorParameter> connectorParameterList =
RuleParserUtils.getConnectorParameterList(inputParameterValue,context);
List<ExecutorParameter> executorParameterList = new ArrayList<>();
List<BaseConfig> readerConfigList =
RuleParserUtils.getReaderConfigList(inputParameterValue,context);
RuleParserUtils.addStatisticsValueTableReaderConfig(readerConfigList,context);
List<BaseConfig> transformerConfigList = RuleParserUtils
.getSingleTableCustomSqlTransformerConfigList(index,inputParameterValue);
//replace the placeholder in execute sql list
index = RuleParserUtils.replaceExecuteSqlPlaceholder(
context.getExecuteSqlList(),
index,
inputParameterValue,
executorParameterList);
context.getExecuteSqlList(),
index,
inputParameterValue,
transformerConfigList);
String writerSql = RuleManager.SINGLE_TABLE_CUSTOM_SQL_WRITER_SQL;
if (context.isCompareWithFixedValue()) {
writerSql = writerSql.replaceAll("join \\$\\{comparison_table}","");
}
List<WriterParameter> writerParameterList = RuleParserUtils.getWriterParameterList(
index,
inputParameterValue,
executorParameterList,
context,
RuleManager.SINGLE_TABLE_CUSTOM_SQL_WRITER_SQL
);
List<BaseConfig> writerConfigList = RuleParserUtils.getAllWriterConfigList(inputParameterValue,
context, index, transformerConfigList, writerSql,RuleManager.TASK_STATISTICS_VALUE_WRITER_SQL);
return new DataQualityConfiguration(
context.getRuleName(),
connectorParameterList,
writerParameterList,
executorParameterList);
RuleParserUtils.getEnvConfig(),
readerConfigList,
writerConfigList,
transformerConfigList);
}
}

35
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/task/dq/rule/parser/SingleTableRuleParser.java

@ -25,10 +25,8 @@ import org.apache.dolphinscheduler.dao.entity.DqRuleExecuteSql;
import org.apache.dolphinscheduler.server.entity.DataQualityTaskExecutionContext;
import org.apache.dolphinscheduler.server.utils.RuleParserUtils;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.RuleManager;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ConnectorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.BaseConfig;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.DataQualityConfiguration;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.ExecutorParameter;
import org.apache.dolphinscheduler.server.worker.task.dq.rule.parameter.WriterParameter;
import java.util.ArrayList;
import java.util.List;
@ -48,29 +46,32 @@ public class SingleTableRuleParser implements IRuleParser {
int index = 1;
List<ConnectorParameter> connectorParameterList =
RuleParserUtils.getConnectorParameterList(inputParameterValue,context);
List<ExecutorParameter> executorParameterList = new ArrayList<>();
List<BaseConfig> readerConfigList =
RuleParserUtils.getReaderConfigList(inputParameterValue,context);
RuleParserUtils.addStatisticsValueTableReaderConfig(readerConfigList,context);
List<BaseConfig> transformerConfigList = new ArrayList<>();
//replace the placeholder in execute sql list
index = RuleParserUtils.replaceExecuteSqlPlaceholder(
context.getExecuteSqlList(),
index,
inputParameterValue,
executorParameterList);
transformerConfigList);
String writerSql = RuleManager.DEFAULT_COMPARISON_WRITER_SQL;
if (context.isCompareWithFixedValue()) {
writerSql = writerSql.replaceAll("full join \\$\\{comparison_table}","");
}
List<WriterParameter> writerParameterList = RuleParserUtils.getWriterParameterList(
index,
inputParameterValue,
executorParameterList,
context,
RuleManager.DEFAULT_COMPARISON_WRITER_SQL
);
List<BaseConfig> writerConfigList = RuleParserUtils.getAllWriterConfigList(inputParameterValue,
context, index, transformerConfigList, writerSql,RuleManager.TASK_STATISTICS_VALUE_WRITER_SQL);
return new DataQualityConfiguration(
context.getRuleName(),
connectorParameterList,
writerParameterList,
executorParameterList);
RuleParserUtils.getEnvConfig(),
readerConfigList,
writerConfigList,
transformerConfigList);
}
}

23
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/registry/DependencyConfig.java

@ -18,15 +18,18 @@
package org.apache.dolphinscheduler.server.registry;
import org.apache.dolphinscheduler.dao.AlertDao;
import org.apache.dolphinscheduler.dao.datasource.SpringConnectionFactory;
import org.apache.dolphinscheduler.dao.mapper.AlertGroupMapper;
import org.apache.dolphinscheduler.dao.mapper.AlertMapper;
import org.apache.dolphinscheduler.dao.mapper.AlertPluginInstanceMapper;
import org.apache.dolphinscheduler.dao.mapper.CommandMapper;
import org.apache.dolphinscheduler.dao.mapper.DataSourceMapper;
import org.apache.dolphinscheduler.dao.mapper.DqComparisonTypeMapper;
import org.apache.dolphinscheduler.dao.mapper.DqExecuteResultMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleExecuteSqlMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleInputEntryMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleMapper;
import org.apache.dolphinscheduler.dao.mapper.DqTaskStatisticsValueMapper;
import org.apache.dolphinscheduler.dao.mapper.ErrorCommandMapper;
import org.apache.dolphinscheduler.dao.mapper.PluginDefineMapper;
import org.apache.dolphinscheduler.dao.mapper.ProcessDefinitionMapper;
@ -44,6 +47,7 @@ import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.dispatch.host.HostManager;
import org.apache.dolphinscheduler.server.master.dispatch.host.RandomHostManager;
import org.apache.dolphinscheduler.server.master.processor.queue.TaskResponseService;
import org.apache.dolphinscheduler.server.utils.DataQualityResultOperator;
import org.apache.dolphinscheduler.server.worker.processor.TaskCallbackService;
import org.apache.dolphinscheduler.service.process.ProcessService;
import org.apache.dolphinscheduler.service.queue.TaskPriorityQueue;
@ -204,4 +208,23 @@ public class DependencyConfig {
return Mockito.mock(DqRuleExecuteSqlMapper.class);
}
@Bean
public DqComparisonTypeMapper dqComparisonTypeMapper() {
return Mockito.mock(DqComparisonTypeMapper.class);
}
@Bean
public DqTaskStatisticsValueMapper dqTaskStatisticsValueMapper() {
return Mockito.mock(DqTaskStatisticsValueMapper.class);
}
@Bean
public SpringConnectionFactory springConnectionFactory() {
return Mockito.mock(SpringConnectionFactory.class);
}
@Bean
public DataQualityResultOperator dataQualityResultOperator() {
return Mockito.mock(DataQualityResultOperator.class);
}
}

44
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/utils/JdbcUrlParserTest.java

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.utils;
import org.apache.dolphinscheduler.common.model.JdbcInfo;
import org.junit.Assert;
import org.junit.Test;
/**
* JdbcUrlParserTest
*/
public class JdbcUrlParserTest {
@Test
public void testGetJdbcInfo() {
JdbcInfo jdbcInfo =
JdbcUrlParser.getJdbcInfo("jdbc:mysql://localhost:3306/dolphinscheduler?"
+ "useUnicode=true&characterEncoding=UTF-8");
if (jdbcInfo != null) {
String jdbcInfoStr = jdbcInfo.toString();
String expected = "JdbcInfo{host='localhost', port='3306', "
+ "driverName='mysql', database='dolphinscheduler', "
+ "params='useUnicode=true&characterEncoding=UTF-8', "
+ "address='jdbc:mysql://localhost:3306'}";
Assert.assertEquals(expected,jdbcInfoStr);
}
}
}

23
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackServiceTestConfig.java

@ -18,15 +18,18 @@
package org.apache.dolphinscheduler.server.worker.processor;
import org.apache.dolphinscheduler.dao.AlertDao;
import org.apache.dolphinscheduler.dao.datasource.SpringConnectionFactory;
import org.apache.dolphinscheduler.dao.mapper.AlertGroupMapper;
import org.apache.dolphinscheduler.dao.mapper.AlertMapper;
import org.apache.dolphinscheduler.dao.mapper.AlertPluginInstanceMapper;
import org.apache.dolphinscheduler.dao.mapper.CommandMapper;
import org.apache.dolphinscheduler.dao.mapper.DataSourceMapper;
import org.apache.dolphinscheduler.dao.mapper.DqComparisonTypeMapper;
import org.apache.dolphinscheduler.dao.mapper.DqExecuteResultMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleExecuteSqlMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleInputEntryMapper;
import org.apache.dolphinscheduler.dao.mapper.DqRuleMapper;
import org.apache.dolphinscheduler.dao.mapper.DqTaskStatisticsValueMapper;
import org.apache.dolphinscheduler.dao.mapper.ErrorCommandMapper;
import org.apache.dolphinscheduler.dao.mapper.PluginDefineMapper;
import org.apache.dolphinscheduler.dao.mapper.ProcessDefinitionMapper;
@ -40,6 +43,7 @@ import org.apache.dolphinscheduler.dao.mapper.TenantMapper;
import org.apache.dolphinscheduler.dao.mapper.UdfFuncMapper;
import org.apache.dolphinscheduler.dao.mapper.UserMapper;
import org.apache.dolphinscheduler.server.master.cache.impl.TaskInstanceCacheManagerImpl;
import org.apache.dolphinscheduler.server.utils.DataQualityResultOperator;
import org.apache.dolphinscheduler.service.process.ProcessService;
import org.mockito.Mockito;
@ -172,4 +176,23 @@ public class TaskCallbackServiceTestConfig {
return Mockito.mock(DqRuleExecuteSqlMapper.class);
}
@Bean
public DqComparisonTypeMapper dqComparisonTypeMapper() {
return Mockito.mock(DqComparisonTypeMapper.class);
}
@Bean
public DqTaskStatisticsValueMapper dqTaskStatisticsValueMapper() {
return Mockito.mock(DqTaskStatisticsValueMapper.class);
}
@Bean
public SpringConnectionFactory springConnectionFactory() {
return Mockito.mock(SpringConnectionFactory.class);
}
@Bean
public DataQualityResultOperator dataQualityResultOperator() {
return Mockito.mock(DataQualityResultOperator.class);
}
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save