From 8efaa9fa1f1a5ab70dad0137df13da6480be59e9 Mon Sep 17 00:00:00 2001 From: Jay Chung Date: Mon, 5 Feb 2024 09:54:06 +0800 Subject: [PATCH] fix: data quality can not use (#15551) * fix: data quality can not use fix: #15468, #15249, #14858 --------- Co-authored-by: Rick Cheng Co-authored-by: Eric Gao --- docs/docs/en/guide/data-quality.md | 10 +-- docs/docs/en/guide/resource/configuration.md | 5 +- docs/docs/en/guide/upgrade/incompatible.md | 1 + docs/docs/zh/guide/data-quality.md | 9 +-- docs/docs/zh/guide/resource/configuration.md | 5 +- .../docker/file-manage/common.properties | 5 +- .../common/constants/DataSourceConstants.java | 2 +- .../src/main/resources/common.properties | 5 +- .../log/SensitiveDataConverterTest.java | 28 +++++++ .../src/test/resources/common.properties | 5 +- .../datasource/api/utils/CommonUtils.java | 61 ++++++++++++++-- .../mysql/param/MySQLDataSourceProcessor.java | 14 ++++ .../docker/file-manage/common.properties | 5 +- .../runner/TaskExecutionContextFactory.java | 27 +++---- .../datasource/DefaultConnectionParam.java | 36 +++++++++ .../plugin/task/api/model/JdbcInfo.java | 73 ++++--------------- .../plugin/task/api/utils/JdbcUrlParser.java | 24 +++++- .../task/api/utils/JdbcUrlParserTest.java | 18 ++++- .../src/test/resources/common.properties | 5 +- .../plugin/task/dq/DataQualityTask.java | 6 +- 20 files changed, 221 insertions(+), 123 deletions(-) create mode 100644 dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java diff --git a/docs/docs/en/guide/data-quality.md b/docs/docs/en/guide/data-quality.md index 2ed49b5f86..f6aa7a06b2 100644 --- a/docs/docs/en/guide/data-quality.md +++ b/docs/docs/en/guide/data-quality.md @@ -12,15 +12,7 @@ The execution logic of the data quality task is as follows: - The current data quality task result is stored in the `t_ds_dq_execute_result` table of `dolphinscheduler` `Worker` sends the task result to `Master`, after `Master` receives `TaskResponse`, it will judge whether the task type is `DataQualityTask`, if so, it will read the corresponding result from `t_ds_dq_execute_result` according to `taskInstanceId`, and then The result is judged according to the check mode, operator and threshold configured by the user. - If the result is a failure, the corresponding operation, alarm or interruption will be performed according to the failure policy configured by the user. -- Add config : `/conf/common.properties` - -```properties -# Change to specific version if you not use dev branch -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar -``` - -- Please fill in `data-quality.jar.name` according to the actual package name. -- If you package `data-quality` separately, remember to modify the package name to be consistent with `data-quality.jar.name`. +- If you package `data-quality` separately, remember to modify the package name to be consistent with `data-quality.jar.name` in `common.properties` with attribute name `data-quality.jar.name` - If the old version is upgraded and used, you need to execute the `sql` update script to initialize the database before running. - `dolphinscheduler-data-quality-dev-SNAPSHOT.jar` was built with no dependencies. If a `JDBC` driver is required, you can set the `-jars` parameter in the `node settings` `Option Parameters`, e.g. `--jars /lib/jars/mysql-connector-java-8.0.16.jar`. - Currently only `MySQL`, `PostgreSQL` and `HIVE` data sources have been tested, other data sources have not been tested yet. diff --git a/docs/docs/en/guide/resource/configuration.md b/docs/docs/en/guide/resource/configuration.md index 112b5458cc..42e1925e89 100644 --- a/docs/docs/en/guide/resource/configuration.md +++ b/docs/docs/en/guide/resource/configuration.md @@ -152,8 +152,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/docs/docs/en/guide/upgrade/incompatible.md b/docs/docs/en/guide/upgrade/incompatible.md index 0ecd94c8cb..4580a7d13d 100644 --- a/docs/docs/en/guide/upgrade/incompatible.md +++ b/docs/docs/en/guide/upgrade/incompatible.md @@ -10,6 +10,7 @@ This document records the incompatible updates between each version. You need to * Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)). * Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)). * Remove `deleteSource` in `download()` of `StorageOperate` ([#14084](https://github.com/apache/dolphinscheduler/pull/14084)) +* Remove default key for attribute `data-quality.jar.name` in `common.properties` ([#15551](https://github.com/apache/dolphinscheduler/pull/15551)) ## 3.2.0 diff --git a/docs/docs/zh/guide/data-quality.md b/docs/docs/zh/guide/data-quality.md index 95ca5e2d68..2a098a3216 100644 --- a/docs/docs/zh/guide/data-quality.md +++ b/docs/docs/zh/guide/data-quality.md @@ -13,14 +13,7 @@ > ## 注意事项 -添加配置信息:`/conf/common.properties` - -```properties -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar -``` - -- 这里的`data-quality.jar.name`请根据实际打包的名称来填写。 -- 如果单独打包`data-quality`的话,记得修改包名和`data-quality.jar.name`一致。 +- 如果单独打包`data-quality`的话,记得修改包名和`data-quality.jar.name`一致,配置内容在 `common.properties` 中的 `data-quality.jar.name` - 如果是老版本升级使用,运行之前需要先执行`SQL`更新脚本进行数据库初始化。 - 当前 `dolphinscheduler-data-quality-dev-SNAPSHOT.jar` 是瘦包,不包含任何 `JDBC` 驱动。 如果有 `JDBC` 驱动需要,可以在`节点设置` `选项参数`处设置 `--jars` 参数, diff --git a/docs/docs/zh/guide/resource/configuration.md b/docs/docs/zh/guide/resource/configuration.md index c5c7f85620..57d0935e09 100644 --- a/docs/docs/zh/guide/resource/configuration.md +++ b/docs/docs/zh/guide/resource/configuration.md @@ -156,8 +156,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties index 000341f153..d43e55e822 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties @@ -84,8 +84,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java index 568eb8c6f5..11347942bd 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java @@ -102,7 +102,7 @@ public class DataSourceConstants { * dataSource sensitive param */ public static final String DATASOURCE_PASSWORD_REGEX = - "(?<=((?i)password((\":\")|(\\\\\":\\\\\")|(=')))).*?(?=((\")|(\\\\\")|(')))"; + "(?<=((?i)password((\" : \")|(\":\")|(\\\\\":\\\\\")|(=')))).*?(?=((\")|(\\\\\")|(')))"; /** * datasource encryption salt diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 28ebf4571d..451a0f734c 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -120,8 +120,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java index c641c296b8..e6078ae95f 100644 --- a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java +++ b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java @@ -79,6 +79,34 @@ public class SensitiveDataConverterTest { " }\n" + "}"); + // data quality + tcs.put("\"readers\" : [ {\n" + + " \"type\" : \"JDBC\",\n" + + " \"config\" : {\n" + + " \"database\" : \"dolphinscheduler\",\n" + + " \"password\" : \"view1\",\n" + + " \"driver\" : \"com.mysql.cj.jdbc.Driver\",\n" + + " \"user\" : \"root\",\n" + + " \"output_table\" : \"dolphinscheduler_users\",\n" + + " \"table\" : \"users\",\n" + + " \"url\" : \"jdbc:mysql://127.0.0.1:3307/dolphinscheduler?userSSL=true&enabledTLSProtocols=TLSv1.2\"\n" + + + " }\n" + + " } ]", + "\"readers\" : [ {\n" + + " \"type\" : \"JDBC\",\n" + + " \"config\" : {\n" + + " \"database\" : \"dolphinscheduler\",\n" + + " \"password\" : \"*****\",\n" + + " \"driver\" : \"com.mysql.cj.jdbc.Driver\",\n" + + " \"user\" : \"root\",\n" + + " \"output_table\" : \"dolphinscheduler_users\",\n" + + " \"table\" : \"users\",\n" + + " \"url\" : \"jdbc:mysql://127.0.0.1:3307/dolphinscheduler?userSSL=true&enabledTLSProtocols=TLSv1.2\"\n" + + + " }\n" + + " } ]"); + for (String logMsg : tcs.keySet()) { String maskedLog = SensitiveDataConverter.maskSensitiveData(logMsg); logger.info("original parameter : {}", logMsg); diff --git a/dolphinscheduler-common/src/test/resources/common.properties b/dolphinscheduler-common/src/test/resources/common.properties index ef6cc3710e..107977df7f 100644 --- a/dolphinscheduler-common/src/test/resources/common.properties +++ b/dolphinscheduler-common/src/test/resources/common.properties @@ -115,8 +115,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java index e6ecef287a..a4e64594c0 100644 --- a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java +++ b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java @@ -36,17 +36,26 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import java.io.File; import java.io.IOException; +import java.util.Optional; + +import lombok.extern.slf4j.Slf4j; + +import org.springframework.core.io.ClassPathResource; /** * common utils */ +@Slf4j public class CommonUtils { private CommonUtils() { throw new UnsupportedOperationException("Construct CommonUtils"); } + private static String DEFAULT_DATA_QUALITY_JAR_PATH = null; + private static final boolean IS_DEVELOP_MODE = PropertyUtils.getBoolean(Constants.DEVELOPMENT_STATE, true); /** @@ -123,14 +132,56 @@ public class CommonUtils { return false; } - public static String getDataQualityJarName() { - String dqsJarName = PropertyUtils.getString(DATA_QUALITY_JAR_NAME); + public static String getDataQualityJarPath() { + String dqsJarPath = PropertyUtils.getString(DATA_QUALITY_JAR_NAME); - if (StringUtils.isEmpty(dqsJarName)) { - return "dolphinscheduler-data-quality.jar"; + if (StringUtils.isEmpty(dqsJarPath)) { + log.info("data quality jar path is empty, will try to get it from data quality jar name"); + return getDefaultDataQualityJarPath(); } - return dqsJarName; + return dqsJarPath; + } + + private static String getDefaultDataQualityJarPath() { + if (StringUtils.isNotEmpty(DEFAULT_DATA_QUALITY_JAR_PATH)) { + return DEFAULT_DATA_QUALITY_JAR_PATH; + } + try { + // not standalone mode + String currentAbsolutePath = new ClassPathResource("./").getFile().getAbsolutePath(); + String currentLibPath = currentAbsolutePath + "/../libs"; + getDataQualityJarPathFromPath(currentLibPath).ifPresent(jarName -> DEFAULT_DATA_QUALITY_JAR_PATH = jarName); + + // standalone mode + if (StringUtils.isEmpty(DEFAULT_DATA_QUALITY_JAR_PATH)) { + log.info( + "Can not get data quality jar from path {}, maybe service running in standalone mode, will try to find another path", + currentLibPath); + currentLibPath = currentAbsolutePath + "/../../worker-server/libs"; + getDataQualityJarPathFromPath(currentLibPath) + .ifPresent(jarName -> DEFAULT_DATA_QUALITY_JAR_PATH = jarName); + } + } catch (IOException e) { + throw new RuntimeException("get default data quality jar path error", e); + } + log.info("get default data quality jar name: {}", DEFAULT_DATA_QUALITY_JAR_PATH); + return DEFAULT_DATA_QUALITY_JAR_PATH; + } + + private static Optional getDataQualityJarPathFromPath(String path) { + log.info("Try to get data quality jar from path {}", path); + File[] jars = new File(path).listFiles(); + if (jars == null) { + log.warn("No data quality related jar found from path {}", path); + return Optional.empty(); + } + for (File jar : jars) { + if (jar.getName().startsWith("dolphinscheduler-data-quality")) { + return Optional.of(jar.getAbsolutePath()); + } + } + return Optional.empty(); } /** diff --git a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java index c1b91e5930..b954defdd1 100644 --- a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java +++ b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java @@ -33,6 +33,7 @@ import org.apache.commons.collections4.MapUtils; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; @@ -115,6 +116,10 @@ public class MySQLDataSourceProcessor extends AbstractDataSourceProcessor { @Override public String getJdbcUrl(ConnectionParam connectionParam) { MySQLConnectionParam mysqlConnectionParam = (MySQLConnectionParam) connectionParam; + if (MapUtils.isNotEmpty(mysqlConnectionParam.getOther())) { + return String.format("%s?%s", mysqlConnectionParam.getJdbcUrl(), + transformOther(mysqlConnectionParam.getOther())); + } return mysqlConnectionParam.getJdbcUrl(); } @@ -182,4 +187,13 @@ public class MySQLDataSourceProcessor extends AbstractDataSourceProcessor { && !key.contains(ALLOW_URL_IN_LOCAL_IN_FILE_NAME); } + private String transformOther(Map otherMap) { + if (MapUtils.isNotEmpty(otherMap)) { + List list = new ArrayList<>(otherMap.size()); + otherMap.forEach((key, value) -> list.add(String.format("%s=%s", key, value))); + return String.join("&", list); + } + return null; + } + } diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index f704bb60fc..b5f61011b3 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -95,8 +95,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality option, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java index d436004719..ab1806ff67 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java @@ -17,13 +17,6 @@ package org.apache.dolphinscheduler.server.master.runner; -import static org.apache.dolphinscheduler.common.constants.Constants.ADDRESS; -import static org.apache.dolphinscheduler.common.constants.Constants.DATABASE; -import static org.apache.dolphinscheduler.common.constants.Constants.JDBC_URL; -import static org.apache.dolphinscheduler.common.constants.Constants.OTHER; -import static org.apache.dolphinscheduler.common.constants.Constants.PASSWORD; -import static org.apache.dolphinscheduler.common.constants.Constants.SINGLE_SLASH; -import static org.apache.dolphinscheduler.common.constants.Constants.USER; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.CLUSTER; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.NAMESPACE_NAME; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.TASK_TYPE_DATA_QUALITY; @@ -70,6 +63,8 @@ import org.apache.dolphinscheduler.server.master.config.MasterConfig; import org.apache.dolphinscheduler.server.master.exception.TaskExecutionContextCreateException; import org.apache.dolphinscheduler.service.expand.CuringParamsService; import org.apache.dolphinscheduler.service.process.ProcessService; +import org.apache.dolphinscheduler.spi.datasource.BaseConnectionParam; +import org.apache.dolphinscheduler.spi.datasource.DefaultConnectionParam; import org.apache.dolphinscheduler.spi.enums.DbType; import org.apache.commons.collections4.CollectionUtils; @@ -80,7 +75,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; -import java.util.Properties; import lombok.extern.slf4j.Slf4j; @@ -400,15 +394,16 @@ public class TaskExecutionContextFactory { dataSource.setUserName(hikariDataSource.getUsername()); JdbcInfo jdbcInfo = JdbcUrlParser.getJdbcInfo(hikariDataSource.getJdbcUrl()); if (jdbcInfo != null) { - Properties properties = new Properties(); - properties.setProperty(USER, hikariDataSource.getUsername()); - properties.setProperty(PASSWORD, hikariDataSource.getPassword()); - properties.setProperty(DATABASE, jdbcInfo.getDatabase()); - properties.setProperty(ADDRESS, jdbcInfo.getAddress()); - properties.setProperty(OTHER, jdbcInfo.getParams()); - properties.setProperty(JDBC_URL, jdbcInfo.getAddress() + SINGLE_SLASH + jdbcInfo.getDatabase()); + // + BaseConnectionParam baseConnectionParam = new DefaultConnectionParam(); + baseConnectionParam.setUser(hikariDataSource.getUsername()); + baseConnectionParam.setPassword(hikariDataSource.getPassword()); + baseConnectionParam.setDatabase(jdbcInfo.getDatabase()); + baseConnectionParam.setAddress(jdbcInfo.getAddress()); + baseConnectionParam.setJdbcUrl(jdbcInfo.getJdbcUrl()); + baseConnectionParam.setOther(jdbcInfo.getParams()); dataSource.setType(DbType.of(JdbcUrlParser.getDbType(jdbcInfo.getDriverName()).getCode())); - dataSource.setConnectionParams(JSONUtils.toJsonString(properties)); + dataSource.setConnectionParams(JSONUtils.toJsonString(baseConnectionParam)); } return dataSource; diff --git a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java new file mode 100644 index 0000000000..a681ca6230 --- /dev/null +++ b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.spi.datasource; + +public class DefaultConnectionParam extends BaseConnectionParam { + + @Override + public String toString() { + return "DefaultConnectionParam{" + + "user='" + user + '\'' + + ", password='" + password + '\'' + + ", address='" + address + '\'' + + ", database='" + database + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", driverLocation='" + driverLocation + '\'' + + ", driverClassName='" + driverClassName + '\'' + + ", validationQuery='" + validationQuery + '\'' + + ", other='" + other + '\'' + + '}'; + } +} diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java index 3e8f47ba7b..5e90dffbfe 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java @@ -17,9 +17,20 @@ package org.apache.dolphinscheduler.plugin.task.api.model; +import java.util.Map; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + /** * JdbcInfo */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor public class JdbcInfo { private String host; @@ -30,67 +41,9 @@ public class JdbcInfo { private String database; - private String params; + private Map params; private String address; - public String getHost() { - return host; - } - - public void setHost(String host) { - this.host = host; - } - - public String getPort() { - return port; - } - - public void setPort(String port) { - this.port = port; - } - - public String getDriverName() { - return driverName; - } - - public void setDriverName(String driverName) { - this.driverName = driverName; - } - - public String getDatabase() { - return database; - } - - public void setDatabase(String database) { - this.database = database; - } - - public String getParams() { - return params; - } - - public void setParams(String params) { - this.params = params; - } - - public String getAddress() { - return address; - } - - public void setAddress(String address) { - this.address = address; - } - - @Override - public String toString() { - return "JdbcInfo{" - + "host='" + host + '\'' - + ", port='" + port + '\'' - + ", driverName='" + driverName + '\'' - + ", database='" + database + '\'' - + ", params='" + params + '\'' - + ", address='" + address + '\'' - + '}'; - } + private String jdbcUrl; } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java index 2ab0d69e23..e8e5ec299c 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java @@ -19,6 +19,7 @@ package org.apache.dolphinscheduler.plugin.task.api.utils; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.COLON; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.DOUBLE_SLASH; +import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EQUAL_SIGN; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.QUESTION; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.SEMICOLON; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.SINGLE_SLASH; @@ -30,6 +31,9 @@ import org.apache.dolphinscheduler.spi.enums.DbType; import org.apache.commons.lang3.StringUtils; +import java.util.HashMap; +import java.util.Map; + /** * JdbcUrlParser */ @@ -105,8 +109,24 @@ public class JdbcUrlParser { jdbcInfo.setHost(host); jdbcInfo.setPort(port); jdbcInfo.setDatabase(database); - jdbcInfo.setParams(params); - jdbcInfo.setAddress("jdbc:" + driverName + "://" + host + COLON + port); + + if (StringUtils.isNotEmpty(params)) { + Map others = new HashMap<>(); + String[] paramList = params.split("&"); + for (String param : paramList) { + // handle bad params + if (StringUtils.isEmpty(param) || !param.contains(EQUAL_SIGN)) { + continue; + } + String[] kv = param.split(EQUAL_SIGN); + others.put(kv[0], kv[1]); + } + jdbcInfo.setParams(others); + } + + String address = "jdbc:" + driverName + "://" + host + COLON + port; + jdbcInfo.setAddress(address); + jdbcInfo.setJdbcUrl(address + SINGLE_SLASH + database); return jdbcInfo; } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java index 9cef208187..bad9171bac 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java @@ -34,10 +34,20 @@ public class JdbcUrlParserTest { + "useUnicode=true&characterEncoding=UTF-8"); if (jdbcInfo != null) { String jdbcInfoStr = jdbcInfo.toString(); - String expected = "JdbcInfo{host='localhost', port='3306', " - + "driverName='mysql', database='dolphinscheduler', " - + "params='useUnicode=true&characterEncoding=UTF-8', " - + "address='jdbc:mysql://localhost:3306'}"; + String expected = + "JdbcInfo(host=localhost, port=3306, driverName=mysql, database=dolphinscheduler, " + + "params={useUnicode=true, characterEncoding=UTF-8}, address=jdbc:mysql://localhost:3306, jdbcUrl=jdbc:mysql://localhost:3306/dolphinscheduler)"; + Assertions.assertEquals(expected, jdbcInfoStr); + } + + // bad jdbc url case + jdbcInfo = JdbcUrlParser.getJdbcInfo("jdbc:mysql://localhost:3306/dolphinscheduler?" + + "useUnicode=true&&characterEncoding=UTF-8"); + if (jdbcInfo != null) { + String jdbcInfoStr = jdbcInfo.toString(); + String expected = + "JdbcInfo(host=localhost, port=3306, driverName=mysql, database=dolphinscheduler, " + + "params={useUnicode=true, characterEncoding=UTF-8}, address=jdbc:mysql://localhost:3306, jdbcUrl=jdbc:mysql://localhost:3306/dolphinscheduler)"; Assertions.assertEquals(expected, jdbcInfoStr); } } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties index 5fab54a143..9855d855e9 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties @@ -84,8 +84,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java index 1bf1a6454c..ec8adc3eef 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java @@ -50,7 +50,6 @@ import org.apache.dolphinscheduler.plugin.task.dq.utils.SparkArgsUtils; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; -import java.io.File; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -117,6 +116,7 @@ public class DataQualityTask extends AbstractYarnTask { DataQualityConfiguration dataQualityConfiguration = ruleManager.generateDataQualityParameter(); + log.info("data quality configuration: {}", JSONUtils.toPrettyJsonString(dataQualityConfiguration)); dataQualityParameters .getSparkParameters() .setMainArgs("\"" @@ -177,9 +177,7 @@ public class DataQualityTask extends AbstractYarnTask { protected void setMainJarName() { ResourceInfo mainJar = new ResourceInfo(); - String basePath = System.getProperty("user.dir").replace(File.separator + "bin", ""); - mainJar.setResourceName( - basePath + File.separator + "libs" + File.separator + CommonUtils.getDataQualityJarName()); + mainJar.setResourceName(CommonUtils.getDataQualityJarPath()); dataQualityParameters.getSparkParameters().setMainJar(mainJar); }