Skip to content

Commit 4ff008b

Browse files
authored
Merge pull request #85 from godatadriven/84-make-it-easier-to-install-and-use-whirl
84 make it easier to install and use whirl
2 parents b8ad478 + 2315b38 commit 4ff008b

File tree

18 files changed

+159
-34
lines changed

18 files changed

+159
-34
lines changed

.gitattributes

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
11
* text=auto
22
*.sh text eol=lf
33
*.bat text eol=crlf
4-
*.cmd text eol=crlf
4+
*.cmd text eol=crlf
5+
6+
.gitattributes export-ignore
7+
.gitignore export-ignore
8+
.github export-ignore
9+
.gitkeep export-ignore
10+
.editorconfig export-ignore
11+
logo export-ignore
12+
13+
#Non finished examples and their envs
14+
examples/dbt** export-ignore
15+
envs/dbt** export-ignore

.github/workflows/whirl-ci.yml

+52-13
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,20 @@ jobs:
2424
steps:
2525
- uses: actions/checkout@v2
2626
- id: setdirs # Give it an id to handle to get step outputs in the outputs key above
27-
run: echo "::set-output name=dir::$(ls -d ./examples/* | jq -R -s -c 'split("\n")[:-1]')"
28-
# Define step output named dir base on ls command transformed to JSON thanks to jq
27+
# run: echo "::set-output name=dir::$(ls -d ./examples/* | jq -R -s -c 'split("\n")[:-1]')"
28+
run: echo "dir=$(ls -d ./examples/* | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
29+
# Define step output named dir based on ls command transformed to JSON thanks to jq
30+
31+
examples: # Job that list subdirectories of ./examples
32+
runs-on: ubuntu-latest
33+
outputs:
34+
# generate output name example by using inner step output
35+
example: ${{ steps.setexamples.outputs.example }}
36+
steps:
37+
- uses: actions/checkout@v2
38+
- id: setexamples # Give it an id to handle to get step outputs in the outputs key above
39+
run: echo "example=$(ls -d ./examples/* | sed -r 's/\.\/examples\/(.*)/\1/g' | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
40+
# Define step output named example based on ls command transformed to JSON thanks to jq
2941

3042
whirl-ci-default-envs:
3143
needs: [directories]
@@ -34,27 +46,54 @@ jobs:
3446
fail-fast: false
3547
max-parallel: 4
3648
matrix:
37-
example: ${{ fromJson(needs.directories.outputs.dir) }}
49+
example_dir: ${{ fromJson(needs.directories.outputs.dir) }}
3850
python_version: ["3.8", "3.9"]
39-
airflow_version: ["2.2.5", "2.3.2"]
51+
airflow_version: ["2.2.5", "2.5.0"]
4052
exclude:
4153
# Needs more memory than available on the runner
42-
- example: ./examples/dbt-spark-example
43-
- example: ./examples/spark-delta-sharing
44-
- example: ./examples/spark-s3-to-hive
54+
- example_dir: ./examples/dbt-spark-example
55+
- example_dir: ./examples/spark-delta-sharing
56+
- example_dir: ./examples/spark-s3-to-hive
4557
# Exclude failing dbt runs
46-
- example: ./examples/dbt-example
58+
- example_dir: ./examples/dbt-example
4759
env:
4860
PYTHON_VERSION: ${{ matrix.python_version }}
4961
AIRFLOW_VERSION: ${{ matrix.airflow_version }}
5062
steps:
5163
- uses: actions/checkout@v2
52-
- name: Run whirl CI ${{ matrix.example }}
53-
working-directory: ${{ matrix.example }}
64+
- name: Run whirl CI ${{ matrix.example_dir }}
65+
working-directory: ${{ matrix.example_dir }}
5466
run: |
55-
echo Run Ci for example ${{ matrix.example }}
67+
echo Run Ci from example directory ${{ matrix.example_dir }}
5668
../../whirl ci
5769
70+
whirl-ci-default-envs-from-root-dir:
71+
needs: [examples]
72+
runs-on: ubuntu-latest
73+
strategy:
74+
fail-fast: false
75+
max-parallel: 4
76+
matrix:
77+
example: ${{ fromJson(needs.examples.outputs.example) }}
78+
python_version: ["3.8", "3.9"]
79+
airflow_version: ["2.2.5", "2.5.0"]
80+
exclude:
81+
# Needs more memory than available on the runner
82+
- example: dbt-spark-example
83+
- example: spark-delta-sharing
84+
- example: spark-s3-to-hive
85+
# Exclude failing dbt runs
86+
- example: dbt-example
87+
env:
88+
PYTHON_VERSION: ${{ matrix.python_version }}
89+
AIRFLOW_VERSION: ${{ matrix.airflow_version }}
90+
steps:
91+
- uses: actions/checkout@v2
92+
- name: Run whirl CI example ${{ matrix.example }}
93+
run: |
94+
echo Run Ci for example ${{ matrix.example }}
95+
./whirl -x ${{ matrix.example }} ci
96+
5897
5998
whirl-ci-extra-env-spark-s3-to-postgres:
6099
runs-on: ubuntu-latest
@@ -72,7 +111,7 @@ jobs:
72111
max-parallel: 4
73112
matrix:
74113
python_version: ["3.8", "3.9"]
75-
airflow_version: ["2.2.5", "2.3.2"]
114+
airflow_version: ["2.2.5", "2.5.0"]
76115
runs-on: ubuntu-latest
77116
env:
78117
PYTHON_VERSION: ${{ matrix.python_version }}
@@ -91,7 +130,7 @@ jobs:
91130
max-parallel: 4
92131
matrix:
93132
python_version: ["3.8", "3.9"]
94-
airflow_version: ["2.2.5", "2.3.2"]
133+
airflow_version: ["2.2.5", "2.5.0"]
95134
runs-on: ubuntu-latest
96135
env:
97136
PYTHON_VERSION: ${{ matrix.python_version }}

.github/workflows/whirl-release.yml

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: Publish minimal release archive
2+
on:
3+
release:
4+
types: [published]
5+
6+
jobs:
7+
deploy:
8+
runs-on: ubuntu-latest
9+
permissions:
10+
contents: write # for upload release asset
11+
steps:
12+
- uses: actions/checkout@v2
13+
14+
- name: Run git-archive command to create a release artifact
15+
run: git archive --format=tar.gz --prefix=whirl/ --output=whirl-release.tar.gz HEAD
16+
17+
- name: upload Whirl release artifact
18+
env:
19+
GITHUB_TOKEN: ${{ github.token }}
20+
run: |
21+
gh release upload ${{ github.event.release.tag_name }} whirl-release.tar.gz --clobber

README.md

+32-6
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ NOTE: _whirl_ is not intended to replace proper (unit) testing of the logic you
1515

1616
_whirl_ relies on [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/). Make sure you have it installed. If using _Docker for Mac_ or _Windows_ ensure that you have configured it with sufficient RAM (8GB or more recommended) for running all your containers.
1717

18-
When you want to use _whirl_ in your CI pipeline (currently work in progress), you need to have `jq` installed. For example, with Homebrew:
18+
When you want to use _whirl_ in your CI pipeline, you need to have `jq` installed. For example, with Homebrew:
1919

2020
```bash
2121
brew install jq
@@ -28,6 +28,8 @@ As of January 2021, Whirl uses Airflow 2.x.x as the default version. A specific
2828

2929
## Getting Started
3030

31+
### Development
32+
3133
Clone this repository:
3234

3335
```
@@ -38,6 +40,21 @@ For ease of use you can add the base directory to your `PATH` environment variab
3840
export PATH=<target directory of whirl>:${PATH}
3941
```
4042

43+
### Use the release
44+
45+
Download the [latest Whirl release artifact](https://github.com/godatadriven/whirl/releases/download/latest/whirl-release.tar.gz)
46+
47+
Extract the file (for example into `/usr/local/opt`)
48+
49+
```bash
50+
tar -xvzf whirl-release.tar.gz -C /usr/local/opt
51+
```
52+
53+
Make sure the whirl script is available on your path
54+
```bash
55+
export PATH=/usr/local/opt/whirl:$PATH
56+
```
57+
4158
## Usage
4259

4360
The `whirl` script is used to perform all actions.
@@ -51,18 +68,22 @@ $ whirl --help
5168

5269
#### Starting whirl
5370

54-
The default action is to start the DAG in your current directory. It expects an environment to be configured. You can pass this as a command line argument or you can configure it in a `.whirl.env` file. (See [Configuring environment variables](#configuring-environment-variables).) The environment refers to a directory with the same name in the `envs` directory located near the _whirl_ script.
71+
The default action is to start the DAG in your current directory.
72+
73+
With the `[-x example]` commandline argument you can run whirl from anywhere and tell whirl which example dag to run. The example refers to a directory with the same name in the `examples` directory located near the _whirl_ script.
74+
75+
Whirl expects an environment to be configured. You can pass this as a command line argument `[-e environment]` or you can configure it as environment variable `WHIRL_ENVIRONMENT` in a `.whirl.env` file. (See [Configuring environment variables](#configuring-environment-variables).) The environment refers to a directory with the same name in the `envs` directory located near the _whirl_ script.
5576

5677
```bash
57-
$ whirl [start] [-d <directory>] [-e <environment>]
78+
$ whirl [-x example] [-e <environment>] [start]
5879
```
5980

6081
Specifying the `start` command line argument is a more explicit way to start _whirl_.
6182

6283
#### Stopping whirl
6384

6485
```bash
65-
$ whirl stop [-d <directory>] [-e <environment>]
86+
$ whirl [-x example] [-e <environment>] stop
6687
```
6788
Stops the configured environment.
6889

@@ -134,8 +155,6 @@ Each example contains it's own README file to explain the specifics of that exam
134155

135156
#### Generic running of examples
136157

137-
From within the example directory the `whirl` command can be executed.
138-
139158
To run a example:
140159

141160
```bash
@@ -144,6 +163,13 @@ $ cd ./examples/<example-dag-directory>
144163
$ whirl -e <environment to use>
145164
```
146165

166+
or
167+
```bash
168+
$
169+
# Note: here we pass the whirl environment as a command-line argument. It can also be configured with the WHIRL_ENVIRONMENT variable
170+
$ whirl -x <example to run> -e <environment to use>
171+
```
172+
147173
Open your browser to [http://localhost:5000](http://localhost:5000) to access the Airflow UI. Manually enable the DAG and watch the pipeline run to successful completion.
148174

149175

envs/dbt-example/.whirl.env

+2
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
2222
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
2323
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
2424
AIRFLOW__CORE__LOAD_EXAMPLES=False
25+
26+
MINIMAL_AIRFLOW_VERSION=2.3.0

envs/postgres-s3-external-spark/.whirl.env

+2
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
2020
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
2121
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
2222
AIRFLOW__CORE__LOAD_EXAMPLES=False
23+
24+
MINIMAL_AIRFLOW_VERSION=2.3.0

envs/postgres-s3-spark/.whirl.env

+2
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
2020
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
2121
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
2222
AIRFLOW__CORE__LOAD_EXAMPLES=False
23+
24+
MINIMAL_AIRFLOW_VERSION=2.3.0

envs/s3-external-spark-hive/.whirl.env

+2
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
1515
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
1616
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
1717
AIRFLOW__CORE__LOAD_EXAMPLES=False
18+
19+
MINIMAL_AIRFLOW_VERSION=2.3.0

envs/s3-spark-delta-sharing-minio/.whirl.env

+2
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
1414
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
1515
AIRFLOW__CORE__LOAD_EXAMPLES=False
1616
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
17+
18+
MINIMAL_AIRFLOW_VERSION=2.3.0

envs/s3-spark-delta-sharing-riverbank/.whirl.env

+2
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
2121
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
2222
AIRFLOW__CORE__LOAD_EXAMPLES=False
2323
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
24+
25+
MINIMAL_AIRFLOW_VERSION=2.3.0

envs/s3-spark-delta-sharing/.whirl.env

+2
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ AIRFLOW__CORE__EXPOSE_CONFIG=True
1414
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
1515
AIRFLOW__CORE__LOAD_EXAMPLES=False
1616
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
17+
18+
MINIMAL_AIRFLOW_VERSION=2.3.0

envs/sftp-mysql-example/.whirl.env

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
MOCK_DATA_FOLDER=$(pwd)/mock-data
1+
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
22

33

44
# Airflow variables

examples/dbt-example/.whirl.env

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
WHIRL_ENVIRONMENT=dbt-example
2-
MOCK_DATA_FOLDER=$(pwd)/mock-data
2+
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data

examples/dbt-spark-example/.whirl.env

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
WHIRL_ENVIRONMENT=s3-external-spark-hive
2-
MOCK_DATA_FOLDER=$(pwd)/mock-data
2+
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
WHIRL_ENVIRONMENT=s3-spark-delta-sharing
2-
MOCK_DATA_FOLDER=$(pwd)/mock-data
2+
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
33

examples/spark-s3-to-hive/.whirl.env

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
WHIRL_ENVIRONMENT=s3-external-spark-hive
2-
MOCK_DATA_FOLDER=$(pwd)/mock-data
3-
2+
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
WHIRL_ENVIRONMENT=postgres-s3-external-spark
2-
MOCK_DATA_FOLDER=$(pwd)/mock-data
2+
MOCK_DATA_FOLDER=${DAG_FOLDER}/mock-data
3+
MINIMAL_AIRFLOW_VERSION=2.3.0

whirl

+21-7
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@ function export_environment_vars() {
2121
# shellcheck disable=SC2034
2222
DOCKER_CONTEXT_FOLDER=${SCRIPT_DIR}/docker
2323
# shellcheck disable=SC2034
24-
DAG_FOLDER=$(pwd)
25-
# shellcheck disable=SC2034
26-
PROJECTNAME=$(basename "${DAG_FOLDER}")
27-
# shellcheck disable=SC2034
2824
WHIRL_INITIATOR=$(whoami)
2925
# shellcheck disable=SC2034
3026
WHIRL_SETUP_FOLDER=/etc/airflow/whirl.setup.d
@@ -41,6 +37,17 @@ function export_environment_vars() {
4137
. "${SCRIPT_DIR}/.whirl.env"
4238
fi
4339

40+
# determine whether to use the example set at the commandline or
41+
# in the current folder
42+
if [ -z "${WHIRL_EXAMPLE_ARG}" ]; then
43+
# shellcheck disable=SC2034
44+
DAG_FOLDER=$(pwd)
45+
else
46+
DAG_FOLDER=${SCRIPT_DIR}/examples/${WHIRL_EXAMPLE_ARG}
47+
fi
48+
# shellcheck disable=SC2034
49+
PROJECTNAME=$(basename "${DAG_FOLDER}")
50+
4451
# determine whether to use the environment set at the commandline or
4552
# in the DAG FOLDER .whirl.env
4653
if [ -z "${WHIRL_ENVIRONMENT_ARG}" ]; then
@@ -113,7 +120,7 @@ function export_environment_vars() {
113120
}
114121

115122
detect_potential_dag() {
116-
test "$(find . -type f -name '*.py' -o -name '*.zip' | wc -l)" -gt 0
123+
test "$(find "${DAG_FOLDER}" -type f -name '*.py' -maxdepth 1 -o -name '*.zip' | wc -l)" -gt 0
117124
}
118125

119126
check_next_dagrun_scheduled_today() {
@@ -340,6 +347,7 @@ logs() {
340347
usage() {
341348
echo "usage: ${BASH_SOURCE[0]} [-h|--help] [-e|--environment env] [start|stop|ci]"
342349
echo " -h|--help display usage"
350+
echo " -x|--example example specify example to run"
343351
echo " -e|--environment environment specify environment to use"
344352
echo " -d|--directory environment_folder specify the folder that contains the environments (defaults to SCRIPT_DIR)"
345353
echo " -l|--logs servicename tail the logs of the service"
@@ -354,6 +362,11 @@ function read_arguments() {
354362
do
355363
key="${1}"
356364
case ${key} in
365+
-x|--example)
366+
WHIRL_EXAMPLE_ARG="${2}"
367+
shift # past argument
368+
shift # past value
369+
;;
357370
-e|--environment)
358371
WHIRL_ENVIRONMENT_ARG="${2}"
359372
shift # past argument
@@ -401,8 +414,8 @@ function read_arguments() {
401414
function main() {
402415
read_arguments "$@"
403416

417+
export_environment_vars
404418
if detect_potential_dag; then
405-
export_environment_vars
406419

407420
if [ -z "${LOGS}" ]; then
408421
if [ -z "${STOP}" ]; then
@@ -414,7 +427,8 @@ function main() {
414427
logs
415428
fi
416429
else
417-
echo "No .py or .zip files found that may contain an Apache Airflow DAG"
430+
echo "No .py or .zip files found in ${DAG_FOLDER} that may contain an Apache Airflow DAG"
431+
echo "did you correctly specify the example directory?"
418432
fi
419433
}
420434

0 commit comments

Comments
 (0)