Browse Source

[Fix-15760][datasource-plugin] fix sql task split error (#15760) (#15794)

* Fix the bug in SQL splitting by completing the task in two steps: 1. removeComment 2. split

* Add a unit test for Hive SQL splitting.
dev_wenjun_refactorMaster
songwenyong 8 months ago committed by GitHub
parent
commit
5d8808dda4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/datasource/AbstractDataSourceProcessor.java
  2. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-clickhouse/src/main/java/org/apache/dolphinscheduler/plugin/datasource/clickhouse/param/ClickHouseDataSourceProcessor.java
  3. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-dameng/src/main/java/org/apache/dolphinscheduler/plugin/datasource/dameng/param/DamengDataSourceProcessor.java
  4. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-db2/src/main/java/org/apache/dolphinscheduler/plugin/datasource/db2/param/Db2DataSourceProcessor.java
  5. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-hive/src/main/java/org/apache/dolphinscheduler/plugin/datasource/hive/param/HiveDataSourceProcessor.java
  6. 20
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-hive/src/test/java/org/apache/dolphinscheduler/plugin/datasource/hive/param/HiveDataSourceProcessorTest.java
  7. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java
  8. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-oceanbase/src/main/java/org/apache/dolphinscheduler/plugin/datasource/oceanbase/param/OceanBaseDataSourceProcessor.java
  9. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-postgresql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/postgresql/param/PostgreSQLDataSourceProcessor.java
  10. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-sqlserver/src/main/java/org/apache/dolphinscheduler/plugin/datasource/sqlserver/param/SQLServerDataSourceProcessor.java
  11. 3
      dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-trino/src/main/java/org/apache/dolphinscheduler/plugin/datasource/trino/param/TrinoDataSourceProcessor.java

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/datasource/AbstractDataSourceProcessor.java

@ -134,6 +134,7 @@ public abstract class AbstractDataSourceProcessor implements DataSourceProcessor
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.other);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.other);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.other);
}
}

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-clickhouse/src/main/java/org/apache/dolphinscheduler/plugin/datasource/clickhouse/param/ClickHouseDataSourceProcessor.java

@ -129,7 +129,8 @@ public class ClickHouseDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.clickhouse);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.clickhouse);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.clickhouse);
}
private String transformOther(Map<String, String> otherMap) {

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-dameng/src/main/java/org/apache/dolphinscheduler/plugin/datasource/dameng/param/DamengDataSourceProcessor.java

@ -135,7 +135,8 @@ public class DamengDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.dm);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.dm);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.dm);
}
private String transformOther(Map<String, String> paramMap) {

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-db2/src/main/java/org/apache/dolphinscheduler/plugin/datasource/db2/param/Db2DataSourceProcessor.java

@ -129,7 +129,8 @@ public class Db2DataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.db2);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.db2);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.db2);
}
private String transformOther(Map<String, String> otherMap) {

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-hive/src/main/java/org/apache/dolphinscheduler/plugin/datasource/hive/param/HiveDataSourceProcessor.java

@ -152,7 +152,8 @@ public class HiveDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.hive);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.hive);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.hive);
}
private String transformOther(Map<String, String> otherMap) {

20
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-hive/src/test/java/org/apache/dolphinscheduler/plugin/datasource/hive/param/HiveDataSourceProcessorTest.java

@ -23,6 +23,7 @@ import org.apache.dolphinscheduler.plugin.datasource.api.utils.PasswordUtils;
import org.apache.dolphinscheduler.spi.enums.DbType;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Assertions;
@ -94,4 +95,23 @@ public class HiveDataSourceProcessorTest {
Assertions.assertEquals(DataSourceConstants.HIVE_VALIDATION_QUERY,
hiveDatasourceProcessor.getValidationQuery());
}
@Test
void splitAndRemoveComment() {
String sql = "create table if not exists test_ods.tb_test(\n" +
" `id` bigint COMMENT 'id', -- auto increment\n" +
" `user_name` string COMMENT 'username',\n" +
" `birthday` string COMMENT 'birthday',\n" +
" `gender` int COMMENT '1 male 2 female'\n" +
") COMMENT 'user information table' PARTITIONED BY (`date_id` string);\n" +
"\n" +
"-- insert\n" +
"insert\n" +
" overwrite table test_ods.tb_test partition(date_id = '2024-03-28') -- partition\n" +
"values\n" +
" (1, 'Magic', '1990-10-01', '1');";
List<String> list = hiveDatasourceProcessor.splitAndRemoveComment(sql);
Assertions.assertEquals(list.size(), 2);
}
}

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java

@ -177,7 +177,8 @@ public class MySQLDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.mysql);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.mysql);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.mysql);
}
private static boolean checkKeyIsLegitimate(String key) {

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-oceanbase/src/main/java/org/apache/dolphinscheduler/plugin/datasource/oceanbase/param/OceanBaseDataSourceProcessor.java

@ -192,6 +192,7 @@ public class OceanBaseDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.oceanbase);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.oceanbase);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.oceanbase);
}
}

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-postgresql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/postgresql/param/PostgreSQLDataSourceProcessor.java

@ -131,7 +131,8 @@ public class PostgreSQLDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.postgresql);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.postgresql);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.postgresql);
}
private String transformOther(Map<String, String> otherMap) {

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-sqlserver/src/main/java/org/apache/dolphinscheduler/plugin/datasource/sqlserver/param/SQLServerDataSourceProcessor.java

@ -128,7 +128,8 @@ public class SQLServerDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.sqlserver);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.sqlserver);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.sqlserver);
}
private String transformOther(Map<String, String> otherMap) {

3
dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-trino/src/main/java/org/apache/dolphinscheduler/plugin/datasource/trino/param/TrinoDataSourceProcessor.java

@ -131,7 +131,8 @@ public class TrinoDataSourceProcessor extends AbstractDataSourceProcessor {
@Override
public List<String> splitAndRemoveComment(String sql) {
return SQLParserUtils.splitAndRemoveComment(sql, com.alibaba.druid.DbType.trino);
String cleanSQL = SQLParserUtils.removeComment(sql, com.alibaba.druid.DbType.trino);
return SQLParserUtils.split(cleanSQL, com.alibaba.druid.DbType.trino);
}
private String transformOther(Map<String, String> otherMap) {

Loading…
Cancel
Save