From f3d663a7eaf559e855971e83d593a827196a1aa0 Mon Sep 17 00:00:00 2001 From: Jiajie Zhong Date: Fri, 28 Jan 2022 10:12:00 +0800 Subject: [PATCH] [python] Clean deps and prepare release (#8210) * Change package name * Migrate requirement*.txt to setup.py * Add extra required for dev * Add doc RELEASE and DEVELOP * Correct description --- .github/workflows/py-ci.yml | 11 +- .../pydolphinscheduler/DEVELOP.md | 120 +++++++++++++++++ .../pydolphinscheduler/README.md | 123 +++--------------- .../pydolphinscheduler/RELEASE.md | 35 +++++ .../pydolphinscheduler/ROADMAP.md | 34 ----- .../pydolphinscheduler/requirements.txt | 18 --- .../pydolphinscheduler/requirements_dev.txt | 30 ----- .../pydolphinscheduler/setup.py | 60 +++++++-- .../src/pydolphinscheduler/__init__.py | 4 + 9 files changed, 231 insertions(+), 204 deletions(-) create mode 100644 dolphinscheduler-python/pydolphinscheduler/DEVELOP.md create mode 100644 dolphinscheduler-python/pydolphinscheduler/RELEASE.md delete mode 100644 dolphinscheduler-python/pydolphinscheduler/ROADMAP.md delete mode 100644 dolphinscheduler-python/pydolphinscheduler/requirements.txt delete mode 100644 dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt diff --git a/.github/workflows/py-ci.yml b/.github/workflows/py-ci.yml index 60372134b6..db430caea9 100644 --- a/.github/workflows/py-ci.yml +++ b/.github/workflows/py-ci.yml @@ -49,7 +49,7 @@ jobs: with: python-version: 3.7 - name: Install Development Dependences - run: pip install -r requirements_dev.txt + run: pip install -e .[style] - name: Run Isort Checking run: isort --check . - name: Run Black Checking @@ -75,8 +75,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies & pydolphinscheduler run: | - pip install -r requirements.txt -r requirements_dev.txt - pip install -e . + pip install -e .[test] - name: Run tests run: | pytest @@ -93,8 +92,7 @@ jobs: python-version: 3.7 - name: Install Development Dependences run: | - pip install -r requirements_dev.txt - pip install -e . + pip install -e .[test] - name: Run Tests && Check coverage run: coverage run && coverage report doc-build: @@ -109,8 +107,7 @@ jobs: python-version: 3.7 - name: Install Development Dependences run: | - pip install -r requirements_dev.txt - pip install -e . + pip install -e .[doc] - name: Test Build Document working-directory: dolphinscheduler-python/pydolphinscheduler/docs run: make clean && make html diff --git a/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md b/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md new file mode 100644 index 0000000000..f22ab8619b --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md @@ -0,0 +1,120 @@ + + +# Develop + +pydolphinscheduler is python API for Apache DolphinScheduler, it just defines what workflow look like instead of +store or execute it. We here use [py4j][py4j] to dynamically access Java Virtual Machine. + +## Setup Develop Environment + +**PyDolphinScheduler** use GitHub to hold all source code, you should clone the code before you do same change. + +```shell +git clone git@github.com:apache/dolphinscheduler.git +``` + +Now, we should install all dependence to make sure we could run test or check code style locally + +```shell +cd dolphinscheduler/dolphinscheduler-python/pydolphinscheduler +pip install .[dev] +``` + +Next, we have to open pydolphinscheduler project in you editor. We recommend you use [pycharm][pycharm] +instead of [IntelliJ IDEA][idea] to open it. And you could just open directory +`dolphinscheduler-python/pydolphinscheduler` instead of `dolphinscheduler-python`. + + +## Brief Concept + +Apache DolphinScheduler is design to define workflow by UI, and pydolphinscheduler try to define it by code. When +define by code, user usually do not care user, tenant, or queue exists or not. All user care about is created +a new workflow by the code his/her definition. So we have some **side object** in `pydolphinscheduler/side` +directory, their only check object exists or not, and create them if not exists. + +### Process Definition + +pydolphinscheduler workflow object name, process definition is also same name as Java object(maybe would be change to +other word for more simple). + +### Tasks + +pydolphinscheduler tasks object, we use tasks to define exact job we want DolphinScheduler do for us. For now, +we only support `shell` task to execute shell task. [This link][all-task] list all tasks support in DolphinScheduler +and would be implemented in the further. + +## Code Style + +We use [isort][isort] to automatically keep Python imports alphabetically, and use [Black][black] for code +formatter and [Flake8][flake8] for pep8 checker. If you use [pycharm][pycharm]or [IntelliJ IDEA][idea], +maybe you could follow [Black-integration][black-editor] to configure them in your environment. + +Our Python API CI would automatically run code style checker and unittest when you submit pull request in +GitHub, you could also run static check locally. + +```shell +# We recommend you run isort and Black before Flake8, because Black could auto fix some code style issue +# but Flake8 just hint when code style not match pep8 + +# Run Isort +isort . + +# Run Black +black . + +# Run Flake8 +flake8 +``` + +## Testing + +pydolphinscheduler using [pytest][pytest] to test our codebase. GitHub Action will run our test when you create +pull request or commit to dev branch, with python version `3.6|3.7|3.8|3.9` and operating system `linux|macOS|windows`. + +To test locally, you could directly run pytest after set `PYTHONPATH` + +```shell +PYTHONPATH=src/ pytest +``` + +We try to keep pydolphinscheduler usable through unit test coverage. 90% test coverage is our target, but for +now, we require test coverage up to 85%, and each pull request leas than 85% would fail our CI step +`Tests coverage`. We use [coverage][coverage] to check our test coverage, and you could check it locally by +run command. + +```shell +coverage run && coverage report +``` + +It would not only run unit test but also show each file coverage which cover rate less than 100%, and `TOTAL` +line show you total coverage of you code. If your CI failed with coverage you could go and find some reason by +this command output. + + +[py4j]: https://www.py4j.org/index.html +[pycharm]: https://www.jetbrains.com/pycharm +[idea]: https://www.jetbrains.com/idea/ +[all-task]: https://dolphinscheduler.apache.org/en-us/docs/dev/user_doc/guide/task/shell.html +[pytest]: https://docs.pytest.org/en/latest/ +[black]: https://black.readthedocs.io/en/stable/index.html +[flake8]: https://flake8.pycqa.org/en/latest/index.html +[black-editor]: https://black.readthedocs.io/en/stable/integrations/editors.html#pycharm-intellij-idea +[coverage]: https://coverage.readthedocs.io/en/stable/ +[isort]: https://pycqa.github.io/isort/index.html diff --git a/dolphinscheduler-python/pydolphinscheduler/README.md b/dolphinscheduler-python/pydolphinscheduler/README.md index b06632cc00..dfee43f2cc 100644 --- a/dolphinscheduler-python/pydolphinscheduler/README.md +++ b/dolphinscheduler-python/pydolphinscheduler/README.md @@ -23,27 +23,24 @@ [![Code style: black][black-shield]][black-gh] [![Imports: isort][isort-shield]][isort-gh] -pydolphinscheduler is python API for Apache DolphinScheduler, which allow you definition +**PyDolphinScheduler** is python API for Apache DolphinScheduler, which allow you definition your workflow by python code, aka workflow-as-codes. ## Quick Start -> **_Notice:_** For now, due to pydolphinscheduler without release to any binary tarball or [PyPI][pypi], you -> have to clone Apache DolphinScheduler code from GitHub to ensure quick start setup - -Here we show you how to install and run a simple example of pydolphinscheduler - -### Prepare +### Installation ```shell -# Clone code from github -git clone git@github.com:apache/dolphinscheduler.git +# Install +$ pip install apache-dolphinscheduler -# Install pydolphinscheduler from source -cd dolphinscheduler-python/pydolphinscheduler -pip install -e . +# Check installation, it is success if you see version output, here we use 0.1.0 as example +$ python -c "import pydolphinscheduler; print(pydolphinscheduler.__version__)" +0.1.0 ``` +Here we show you how to install and run a simple example of pydolphinscheduler + ### Start Server And Run Example Before you run an example, you have to start backend server. You could follow [development setup][dev-setup] @@ -54,9 +51,12 @@ http://localhost:12345/dolphinscheduler After backend server is being start, all requests from `pydolphinscheduler` would be sent to backend server. And for now we could run a simple example by: + + ```shell -cd dolphinscheduler-python/pydolphinscheduler -python example/tutorial.py +# Please make sure your terminal could +curl https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py -o ./tutorial.py +python ./tutorial.py ``` > **_NOTICE:_** Since Apache DolphinScheduler's tenant is requests while running command, you might need to change @@ -65,105 +65,24 @@ python example/tutorial.py After command execute, you could see a new project with single process definition named *tutorial* in the [UI][ui-project]. -Until now, we finish quick start by an example of pydolphinscheduler and run it. If you want to inspect or join -pydolphinscheduler develop, you could take a look at [develop](#develop) - ## Develop -pydolphinscheduler is python API for Apache DolphinScheduler, it just defines what workflow look like instead of -store or execute it. We here use [py4j][py4j] to dynamically access Java Virtual Machine. - -### Setup Develop Environment - -We already clone the code in [quick start](#quick-start), so next step we have to open pydolphinscheduler project -in you editor. We recommend you use [pycharm][pycharm] instead of [IntelliJ IDEA][idea] to open it. And you could -just open directory `dolphinscheduler-python/pydolphinscheduler` instead of `dolphinscheduler-python`. - -Then you should add developer dependence to make sure you could run test and check code style locally - -```shell -pip install -r requirements_dev.txt -``` - -### Brief Concept - -Apache DolphinScheduler is design to define workflow by UI, and pydolphinscheduler try to define it by code. When -define by code, user usually do not care user, tenant, or queue exists or not. All user care about is created -a new workflow by the code his/her definition. So we have some **side object** in `pydolphinscheduler/side` -directory, their only check object exists or not, and create them if not exists. - -#### Process Definition - -pydolphinscheduler workflow object name, process definition is also same name as Java object(maybe would be change to -other word for more simple). - -#### Tasks - -pydolphinscheduler tasks object, we use tasks to define exact job we want DolphinScheduler do for us. For now, -we only support `shell` task to execute shell task. [This link][all-task] list all tasks support in DolphinScheduler -and would be implemented in the further. - -### Code Style - -We use [isort][isort] to automatically keep Python imports alphabetically, and use [Black][black] for code -formatter and [Flake8][flake8] for pep8 checker. If you use [pycharm][pycharm]or [IntelliJ IDEA][idea], -maybe you could follow [Black-integration][black-editor] to configure them in your environment. - -Our Python API CI would automatically run code style checker and unittest when you submit pull request in -GitHub, you could also run static check locally. - -```shell -# We recommend you run isort and Black before Flake8, because Black could auto fix some code style issue -# but Flake8 just hint when code style not match pep8 - -# Run Isort -isort . - -# Run Black -black . - -# Run Flake8 -flake8 -``` - -### Testing +Until now, we finish quick start by an example of pydolphinscheduler and run it. If you want to inspect or join +pydolphinscheduler develop, you could take a look at [develop](./DEVELOP.md) -pydolphinscheduler using [pytest][pytest] to test our codebase. GitHub Action will run our test when you create -pull request or commit to dev branch, with python version `3.6|3.7|3.8|3.9` and operating system `linux|macOS|windows`. +## Release -To test locally, you could directly run pytest after set `PYTHONPATH` +If you are interested in how to release **PyDolphinScheduler**, you could go and see at [release](./RELEASE.md) -```shell -PYTHONPATH=src/ pytest -``` - -We try to keep pydolphinscheduler usable through unit test coverage. 90% test coverage is our target, but for -now, we require test coverage up to 85%, and each pull request leas than 85% would fail our CI step -`Tests coverage`. We use [coverage][coverage] to check our test coverage, and you could check it locally by -run command. - -```shell -coverage run && coverage report -``` +## What's more -It would not only run unit test but also show each file coverage which cover rate less than 100%, and `TOTAL` -line show you total coverage of you code. If your CI failed with coverage you could go and find some reason by -this command output. +For more detail information, please go to see **PyDolphinScheduler** [document][pyds-doc-home] [pypi]: https://pypi.org/ [dev-setup]: https://dolphinscheduler.apache.org/en-us/development/development-environment-setup.html [ui-project]: http://8.142.34.29:12345/dolphinscheduler/ui/#/projects/list -[py4j]: https://www.py4j.org/index.html -[pycharm]: https://www.jetbrains.com/pycharm -[idea]: https://www.jetbrains.com/idea/ -[all-task]: https://dolphinscheduler.apache.org/en-us/docs/dev/user_doc/guide/task/shell.html -[pytest]: https://docs.pytest.org/en/latest/ -[black]: https://black.readthedocs.io/en/stable/index.html -[flake8]: https://flake8.pycqa.org/en/latest/index.html -[black-editor]: https://black.readthedocs.io/en/stable/integrations/editors.html#pycharm-intellij-idea -[coverage]: https://coverage.readthedocs.io/en/stable/ -[isort]: https://pycqa.github.io/isort/index.html +[pyds-doc-home]: https://dolphinscheduler.apache.org/python/index.html [ga-py-test]: https://github.com/apache/dolphinscheduler/actions/workflows/py-ci.yml/badge.svg?branch=dev [ga]: https://github.com/apache/dolphinscheduler/actions diff --git a/dolphinscheduler-python/pydolphinscheduler/RELEASE.md b/dolphinscheduler-python/pydolphinscheduler/RELEASE.md new file mode 100644 index 0000000000..6c2b46eee6 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/RELEASE.md @@ -0,0 +1,35 @@ + + +# Release + +**PyDolphinScheduler** office release is in [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/), +and it should be released together with [apache-dolphinscheduler](https://github.com/apache/dolphinscheduler). + +## To ASF Distribution Directory + +You could release to [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/) according to +[release guide](https://dolphinscheduler.apache.org/en-us/community/release-prepare.html) in DolphinScheduler +website. + +## To PyPi + +[PyPI](https://pypi.org), Python Package Index, is a repository of software for the Python programming language. +User could install Python package from it. Release to PyPi make user easier to install and try PyDolphinScheduler, +There is an official way to package project from [PyPA](https://packaging.python.org/en/latest/tutorials/packaging-projects) diff --git a/dolphinscheduler-python/pydolphinscheduler/ROADMAP.md b/dolphinscheduler-python/pydolphinscheduler/ROADMAP.md deleted file mode 100644 index 32ad5e2b39..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/ROADMAP.md +++ /dev/null @@ -1,34 +0,0 @@ - - -## Roadmap - -### v0.0.3 - -Add other features, tasks, parameters in DS, keep code coverage up to 90% - -### v0.0.2 - -Add docs about how to use and develop package, code coverage up to 90%, add CI/CD -for package - -### v0.0.1(current) - -Setup up POC, for defining DAG with python code, running DAG manually, -releasing to pypi \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/requirements.txt b/dolphinscheduler-python/pydolphinscheduler/requirements.txt deleted file mode 100644 index cdec3cabb2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/requirements.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -py4j~=0.10.9.2 \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt b/dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt deleted file mode 100644 index 58f6102deb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# testting -pytest~=6.2.5 -freezegun -# Test coverage -coverage -# code linting and formatting -flake8 -flake8-docstrings -flake8-black -isort -# Document -sphinx -sphinx_rtd_theme diff --git a/dolphinscheduler-python/pydolphinscheduler/setup.py b/dolphinscheduler-python/pydolphinscheduler/setup.py index 4a6c045458..cd6eb3eb77 100644 --- a/dolphinscheduler-python/pydolphinscheduler/setup.py +++ b/dolphinscheduler-python/pydolphinscheduler/setup.py @@ -22,13 +22,41 @@ from os.path import dirname, join from setuptools import find_packages, setup -version = "0.0.1.dev0" - if sys.version_info[0] < 3: raise Exception( "pydolphinscheduler does not support Python 2. Please upgrade to Python 3." ) +version = "0.1.0" + +# Start package required +prod = [ + "py4j~=0.10", +] + +doc = [ + "sphinx>=4.3", + "sphinx_rtd_theme>=1.0", +] + +test = [ + "pytest>=6.2", + "freezegun>=1.1", + "coverage>=6.1", +] + +style = [ + "flake8>=4.0", + "flake8-docstrings>=1.6", + "flake8-black>=0.2", + "isort>=5.10", +] + +dev = style + test + doc + +all_dep = prod + dev +# End package required + def read(*names, **kwargs): """Read file content from given file path.""" @@ -38,10 +66,10 @@ def read(*names, **kwargs): setup( - name="pydolphinscheduler", + name="apache-dolphinscheduler", version=version, license="Apache License 2.0", - description="Apache DolphinScheduler python SDK", + description="Apache DolphinScheduler Python API", long_description=read("README.md"), # Make sure pypi is expecting markdown long_description_content_type="text/markdown", @@ -57,8 +85,8 @@ setup( ], project_urls={ "Homepage": "https://dolphinscheduler.apache.org", - "Documentation": "https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/quick-start.html", - "Source": "https://github.com/apache/dolphinscheduler", + "Documentation": "https://dolphinscheduler.apache.org/python/index.html", + "Source": "https://github.com/apache/dolphinscheduler/dolphinscheduler-python/pydolphinscheduler", "Issue Tracker": "https://github.com/apache/dolphinscheduler/issues", "Discussion": "https://github.com/apache/dolphinscheduler/discussions", "Twitter": "https://twitter.com/dolphinschedule", @@ -66,9 +94,13 @@ setup( packages=find_packages(where="src"), package_dir={"": "src"}, include_package_data=True, + package_data={ + "examples": ["examples.tutorial.py"], + }, + platforms=["any"], classifiers=[ # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers - "Development Status :: 1 - Planning", + "Development Status :: 3 - Alpha", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", @@ -85,10 +117,12 @@ setup( "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: User Interfaces", ], - install_requires=[ - # Core - "py4j~=0.10", - # Dev - "pytest~=6.2", - ], + install_requires=prod, + extras_require={ + "all": all_dep, + "dev": dev, + "style": style, + "test": test, + "doc": doc, + }, ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py index 701b4cccf1..2a7b55430c 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py @@ -16,3 +16,7 @@ # under the License. """Init root of pydolphinscheduler.""" + +from pkg_resources import get_distribution + +__version__ = get_distribution("apache-dolphinscheduler").version