From 8a8b63cd96ad0301322e25936f305ff595f317dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=AE=B6=E5=90=8D?= <13774486042@163.com> Date: Mon, 25 Apr 2022 10:17:20 +0800 Subject: [PATCH] [Improve][python] Support create table syntax and custom sql type param (#9673) --- .../src/pydolphinscheduler/tasks/sql.py | 24 +++++++++- .../tests/tasks/test_sql.py | 46 +++++++++++++------ 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py index b5be3e45aa..c15170c448 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py @@ -17,6 +17,7 @@ """Task sql.""" +import logging import re from typing import Dict, Optional @@ -24,6 +25,8 @@ from pydolphinscheduler.constants import TaskType from pydolphinscheduler.core.database import Database from pydolphinscheduler.core.task import Task +log = logging.getLogger(__file__) + class SqlType: """SQL type, for now it just contain `SELECT` and `NO_SELECT`.""" @@ -61,6 +64,7 @@ class Sql(Task): name: str, datasource_name: str, sql: str, + sql_type: Optional[int] = None, pre_statements: Optional[str] = None, post_statements: Optional[str] = None, display_rows: Optional[int] = 10, @@ -69,6 +73,7 @@ class Sql(Task): ): super().__init__(name, TaskType.SQL, *args, **kwargs) self.sql = sql + self.param_sql_type = sql_type self.datasource_name = datasource_name self.pre_statements = pre_statements or [] self.post_statements = post_statements or [] @@ -76,9 +81,24 @@ class Sql(Task): @property def sql_type(self) -> int: - """Judgement sql type, use regexp to check which type of the sql is.""" + """Judgement sql type, it will return the SQL type for type `SELECT` or `NOT_SELECT`. + + If `param_sql_type` dot not specific, will use regexp to check + which type of the SQL is. But if `param_sql_type` is specific + will use the parameter overwrites the regexp way + """ + if ( + self.param_sql_type == SqlType.SELECT + or self.param_sql_type == SqlType.NOT_SELECT + ): + log.info( + "The sql type is specified by a parameter, with value %s", + self.param_sql_type, + ) + return self.param_sql_type pattern_select_str = ( - "^(?!(.* |)insert |(.* |)delete |(.* |)drop |(.* |)update |(.* |)alter ).*" + "^(?!(.* |)insert |(.* |)delete |(.* |)drop " + "|(.* |)update |(.* |)alter |(.* |)create ).*" ) pattern_select = re.compile(pattern_select_str, re.IGNORECASE) if pattern_select.match(self.sql) is None: diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py index 3f8209c23e..74ac0f67ea 100644 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py +++ b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py @@ -26,24 +26,38 @@ from pydolphinscheduler.tasks.sql import Sql, SqlType @pytest.mark.parametrize( - "sql, sql_type", + "sql, param_sql_type, sql_type", [ - ("select 1", SqlType.SELECT), - (" select 1", SqlType.SELECT), - (" select 1 ", SqlType.SELECT), - (" select 'insert' ", SqlType.SELECT), - (" select 'insert ' ", SqlType.SELECT), - ("with tmp as (select 1) select * from tmp ", SqlType.SELECT), - ("insert into table_name(col1, col2) value (val1, val2)", SqlType.NOT_SELECT), + ("select 1", None, SqlType.SELECT), + (" select 1", None, SqlType.SELECT), + (" select 1 ", None, SqlType.SELECT), + (" select 'insert' ", None, SqlType.SELECT), + (" select 'insert ' ", None, SqlType.SELECT), + ("with tmp as (select 1) select * from tmp ", None, SqlType.SELECT), + ( + "insert into table_name(col1, col2) value (val1, val2)", + None, + SqlType.NOT_SELECT, + ), ( "insert into table_name(select, col2) value ('select', val2)", + None, + SqlType.NOT_SELECT, + ), + ("update table_name SET col1=val1 where col1=val2", None, SqlType.NOT_SELECT), + ( + "update table_name SET col1='select' where col1=val2", + None, SqlType.NOT_SELECT, ), - ("update table_name SET col1=val1 where col1=val2", SqlType.NOT_SELECT), - ("update table_name SET col1='select' where col1=val2", SqlType.NOT_SELECT), - ("delete from table_name where id < 10", SqlType.NOT_SELECT), - ("delete from table_name where id < 10", SqlType.NOT_SELECT), - ("alter table table_name add column col1 int", SqlType.NOT_SELECT), + ("delete from table_name where id < 10", None, SqlType.NOT_SELECT), + ("delete from table_name where id < 10", None, SqlType.NOT_SELECT), + ("alter table table_name add column col1 int", None, SqlType.NOT_SELECT), + ("create table table_name2 (col1 int)", None, SqlType.NOT_SELECT), + ("create table table_name2 (col1 int)", SqlType.SELECT, SqlType.SELECT), + ("select 1", SqlType.NOT_SELECT, SqlType.NOT_SELECT), + ("create table table_name2 (col1 int)", SqlType.NOT_SELECT, SqlType.NOT_SELECT), + ("select 1", SqlType.SELECT, SqlType.SELECT), ], ) @patch( @@ -54,11 +68,13 @@ from pydolphinscheduler.tasks.sql import Sql, SqlType "pydolphinscheduler.core.database.Database.get_database_info", return_value=({"id": 1, "type": "mock_type"}), ) -def test_get_sql_type(mock_datasource, mock_code_version, sql, sql_type): +def test_get_sql_type( + mock_datasource, mock_code_version, sql, param_sql_type, sql_type +): """Test property sql_type could return correct type.""" name = "test_get_sql_type" datasource_name = "test_datasource" - task = Sql(name, datasource_name, sql) + task = Sql(name, datasource_name, sql, sql_type=param_sql_type) assert ( sql_type == task.sql_type ), f"Sql {sql} expect sql type is {sql_type} but got {task.sql_type}"