You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
3.5 KiB
94 lines
3.5 KiB
# Licensed to the Apache Software Foundation (ASF) under one |
|
# or more contributor license agreements. See the NOTICE file |
|
# distributed with this work for additional information |
|
# regarding copyright ownership. The ASF licenses this file |
|
# to you under the Apache License, Version 2.0 (the |
|
# "License"); you may not use this file except in compliance |
|
# with the License. You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, |
|
# software distributed under the License is distributed on an |
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
|
# KIND, either express or implied. See the License for the |
|
# specific language governing permissions and limitations |
|
# under the License. |
|
|
|
""" |
|
A example workflow for task datax. |
|
|
|
This example will create a workflow named `task_datax`. |
|
`task_datax` is true workflow define and run task task_datax. |
|
You can create data sources `first_mysql` and `first_mysql` through UI. |
|
It creates a task to synchronize datax from the source database to the target database. |
|
""" |
|
|
|
|
|
from pydolphinscheduler.core.process_definition import ProcessDefinition |
|
from pydolphinscheduler.tasks.datax import CustomDataX, DataX |
|
|
|
# datax json template |
|
JSON_TEMPLATE = { |
|
"job": { |
|
"content": [ |
|
{ |
|
"reader": { |
|
"name": "mysqlreader", |
|
"parameter": { |
|
"username": "usr", |
|
"password": "pwd", |
|
"column": ["id", "name", "code", "description"], |
|
"splitPk": "id", |
|
"connection": [ |
|
{ |
|
"table": ["source_table"], |
|
"jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/source_db"], |
|
} |
|
], |
|
}, |
|
}, |
|
"writer": { |
|
"name": "mysqlwriter", |
|
"parameter": { |
|
"writeMode": "insert", |
|
"username": "usr", |
|
"password": "pwd", |
|
"column": ["id", "name"], |
|
"connection": [ |
|
{ |
|
"jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db", |
|
"table": ["target_table"], |
|
} |
|
], |
|
}, |
|
}, |
|
} |
|
], |
|
"setting": { |
|
"errorLimit": {"percentage": 0, "record": 0}, |
|
"speed": {"channel": 1, "record": 1000}, |
|
}, |
|
} |
|
} |
|
|
|
with ProcessDefinition( |
|
name="task_datax_example", |
|
tenant="tenant_exists", |
|
) as pd: |
|
# This task synchronizes the data in `t_ds_project` |
|
# of `first_mysql` database to `target_project` of `second_mysql` database. |
|
# You have to make sure data source named `first_mysql` and `second_mysql` exists |
|
# in your environment. |
|
task1 = DataX( |
|
name="task_datax", |
|
datasource_name="first_mysql", |
|
datatarget_name="second_mysql", |
|
sql="select id, name, code, description from source_table", |
|
target_table="target_table", |
|
) |
|
|
|
# You can custom json_template of datax to sync data. This task create a new |
|
# datax job same as task1, transfer record from `first_mysql` to `second_mysql` |
|
task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE)) |
|
pd.run()
|
|
|