Skip to content

Commit 90f5d5b

Browse files
committed
Dbt producer compatibility test
Signed-off-by: Pawel Marut <pawel.marut@xebia.com>
1 parent af18d57 commit 90f5d5b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3730
-2885
lines changed

.github/actions/run_event_validation/action.yml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,22 @@ runs:
5555
run: |
5656
cd tmp
5757
IFS=',' read -ra TAGS <<< "${{ inputs.release_tags }}"
58+
git fetch --tags --quiet
5859
for TAG in "${TAGS[@]}"; do
5960
echo "Checking out tag: $TAG"
60-
git fetch --tags --quiet
6161
if git checkout --quiet "$TAG"; then
6262
DEST_DIR="../specs/$TAG"
63-
if [ -d "spec" ]; then
64-
mkdir -p "../specs/$TAG"
65-
find spec -path './website' -prune -o -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \) -exec cp {} "../specs/$TAG/" \;
63+
if [[ -d "spec" || -d "integration/common/openlineage" ]]; then
64+
mkdir -p "$DEST_DIR"
65+
if [ -d "spec" ]; then
66+
find spec -path './website' -prune -o -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \) -exec cp {} "$DEST_DIR" \;
67+
fi
68+
if [ -d "integration/common/src/openlineage" ]; then
69+
find integration/common/src/openlineage -type f -iname '*facet.json' -exec cp {} "$DEST_DIR" \;
70+
fi
6671
echo "success"
6772
else
68-
echo "Spec directory not found in $TAG"
73+
echo "Neither spec nor integration/common/src/openlineage directory found in $TAG"
6974
fi
7075
else
7176
echo "Tag $TAG not found!"

.github/workflows/main_new_release.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ on:
1717
description: 'Run Hive Dataproc tests'
1818
required: false
1919
default: 'true'
20+
run_dbt:
21+
description: 'Run DBT tests'
22+
required: false
23+
default: 'true'
2024
openlineage_release:
2125
description: 'Override OpenLineage release version'
2226
required: false
@@ -26,6 +30,9 @@ on:
2630
hive_matrix:
2731
description: 'Overwrite matrix for hive tests'
2832
required: false
33+
dbt_matrix:
34+
description: 'Overwrite matrix for hive tests'
35+
required: false
2936

3037
permissions:
3138
id-token: write
@@ -40,9 +47,11 @@ jobs:
4047
run_dataplex: ${{ github.event.inputs.run_dataplex || 'true' }}
4148
run_spark_dataproc: ${{ github.event.inputs.run_spark_dataproc || 'true' }}
4249
run_hive_dataproc: ${{ github.event.inputs.run_hive_dataproc || 'true' }}
50+
run_dbt: ${{ github.event.inputs.run_dbt || 'true' }}
4351
openlineage_release: ${{ github.event.inputs.openlineage_release || steps.select-components.outputs.ol_release }}
4452
spark_matrix: ${{ github.event.inputs.spark_matrix || steps.set-matrix-values.outputs.spark_dataproc_matrix }}
4553
hive_matrix: ${{ github.event.inputs.hive_matrix || steps.set-matrix-values.outputs.hive_dataproc_matrix }}
54+
dbt_matrix: ${{ github.event.inputs.dbt_matrix || steps.set-matrix-values.outputs.dbt_matrix }}
4655
execution_time: ${{ steps.get-execution-time.outputs.execution_time }}
4756
steps:
4857
- name: Get execution time
@@ -90,6 +99,7 @@ jobs:
9099
91100
echo "spark_dataproc_matrix=$(get_matrix spark_dataproc)" >> $GITHUB_OUTPUT
92101
echo "hive_dataproc_matrix=$(get_matrix hive_dataproc)" >> $GITHUB_OUTPUT
102+
echo "dbt_matrix=$(get_matrix dbt)" >> $GITHUB_OUTPUT
93103
94104
######## COMPONENT VALIDATION ########
95105

@@ -130,6 +140,17 @@ jobs:
130140
component_release: ${{ matrix.component_version }}
131141
get-latest-snapshots: 'false'
132142

143+
dbt:
144+
needs: initialize_workflow
145+
if: ${{ needs.initialize_workflow.outputs.run_dbt == 'true' }}
146+
uses: ./.github/workflows/producer_dbt.yml
147+
strategy:
148+
matrix: ${{ fromJson(needs.initialize_workflow.outputs.dbt_matrix) }}
149+
with:
150+
ol_release: ${{ matrix.openlineage_versions }}
151+
dbt_release: ${{ matrix.component_version }}
152+
get-latest-snapshots: 'false'
153+
133154
######## COLLECTION OF REPORTS AND EXECUTE APPROPRIATE ACTIONS ########
134155

135156
collect-and-compare-reports:
@@ -138,6 +159,7 @@ jobs:
138159
- dataplex
139160
- spark-dataproc
140161
- hive-dataproc
162+
- dbt
141163
if: ${{ !failure() }}
142164
uses: ./.github/workflows/collect_and_compare_reports.yml
143165

.github/workflows/main_ol_spec_changes.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ on:
1919
hive_matrix:
2020
description: 'Overwrite matrix for hive tests'
2121
required: false
22+
dbt_matrix:
23+
description: 'Overwrite matrix for hive tests'
24+
required: false
2225

2326

2427
permissions:
@@ -35,6 +38,7 @@ jobs:
3538
ol_release: ${{ github.event.inputs.openlineage_release || steps.get-release.outputs.openlineage_release }}
3639
spark_matrix: ${{ github.event.inputs.spark_matrix || steps.set-matrix-values.outputs.spark_dataproc_matrix }}
3740
hive_matrix: ${{ github.event.inputs.hive_matrix || steps.set-matrix-values.outputs.hive_dataproc_matrix }}
41+
dbt_matrix: ${{ github.event.inputs.dbt_matrix || steps.set-matrix-values.outputs.dbt_matrix }}
3842
execution_time: ${{ steps.get-execution-time.outputs.execution_time }}
3943
steps:
4044
- name: Get execution time
@@ -108,6 +112,7 @@ jobs:
108112
109113
echo "spark_dataproc_matrix=$(get_matrix spark_dataproc)" >> $GITHUB_OUTPUT
110114
echo "hive_dataproc_matrix=$(get_matrix hive_dataproc)" >> $GITHUB_OUTPUT
115+
echo "dbt_matrix=$(get_matrix dbt)" >> $GITHUB_OUTPUT
111116
112117
113118
######## COMPONENT VALIDATION ########
@@ -154,6 +159,18 @@ jobs:
154159
component_release: ${{ matrix.component_version }}
155160
get-latest-snapshots: 'true'
156161

162+
dbt:
163+
needs:
164+
- initialize_workflow
165+
if: ${{ success() && needs.initialize_workflow.outputs.changes_in_spec == 'true' }}
166+
uses: ./.github/workflows/producer_dbt.yml
167+
strategy:
168+
matrix: ${{ fromJson(needs.initialize_workflow.outputs.dbt_matrix) }}
169+
with:
170+
ol_release: ${{ matrix.openlineage_versions }}
171+
dbt_release: ${{ matrix.component_version }}
172+
get-latest-snapshots: 'false'
173+
157174
######## COLLECTION OF REPORTS AND EXECUTE APPROPRIATE ACTIONS ########
158175

159176
collect-and-compare-reports:
@@ -162,6 +179,7 @@ jobs:
162179
- scenarios_check
163180
- spark-dataproc
164181
- hive-dataproc
182+
- dbt
165183
uses: ./.github/workflows/collect_and_compare_reports.yml
166184
with:
167185
fail-for-new-failures: true

.github/workflows/main_pr.yml

Lines changed: 9 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,6 @@ name: Pull Request trigger
22

33
on:
44
pull_request:
5-
workflow_dispatch:
6-
inputs:
7-
components:
8-
description: 'Components to test (comma-separated: dbt, spark_dataproc, hive_dataproc, dataplex, scenarios, or "all")'
9-
required: false
10-
default: 'all'
11-
type: string
125

136

147
permissions:
@@ -56,46 +49,19 @@ jobs:
5649
fi
5750
}
5851
59-
check_component() {
60-
local component=$1
61-
local output=$2
62-
if [[ "$COMPONENTS" == "all" ]] || echo "$COMPONENTS" | grep -qw "$component"; then
63-
echo "$output=true" >> $GITHUB_OUTPUT
64-
echo "true"
65-
fi
66-
}
67-
68-
# Handle workflow_dispatch (manual trigger)
69-
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
70-
COMPONENTS="${{ github.event.inputs.components }}"
71-
echo "Manual trigger - testing components: $COMPONENTS"
52+
CHANGED_FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only)
53+
if [[ -n "$CHANGED_FILES" ]]; then
54+
echo "changes=$(echo "$CHANGED_FILES" | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
7255
73-
scenarios=$(check_component "scenarios" "scenarios_changed")
74-
dataplex=$(check_component "dataplex" "dataplex_changed")
75-
spark_dataproc=$(check_component "spark_dataproc" "spark_dataproc_changed")
76-
hive_dataproc=$(check_component "hive_dataproc" "hive_dataproc_changed")
77-
dbt=$(check_component "dbt" "dbt_changed")
56+
scenarios=$(check_path "consumer/scenarios/" "scenarios_changed")
57+
dataplex=$(check_path "consumer/consumers/dataplex/" "dataplex_changed")
58+
spark_dataproc=$(check_path "producer/spark_dataproc/" "spark_dataproc_changed")
59+
hive_dataproc=$(check_path "producer/hive_dataproc/" "hive_dataproc_changed")
60+
dbt=$(check_path "producer/dbt/" "dbt_changed")
7861
7962
if [[ $scenarios || $dataplex || $spark_dataproc || $hive_dataproc || $dbt ]]; then
8063
echo "any_changed=true" >> $GITHUB_OUTPUT
8164
fi
82-
83-
# Handle pull_request (PR trigger)
84-
else
85-
CHANGED_FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only)
86-
if [[ -n "$CHANGED_FILES" ]]; then
87-
echo "changes=$(echo "$CHANGED_FILES" | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
88-
89-
scenarios=$(check_path "consumer/scenarios/" "scenarios_changed")
90-
dataplex=$(check_path "consumer/consumers/dataplex/" "dataplex_changed")
91-
spark_dataproc=$(check_path "producer/spark_dataproc/" "spark_dataproc_changed")
92-
hive_dataproc=$(check_path "producer/hive_dataproc/" "hive_dataproc_changed")
93-
dbt=$(check_path "producer/dbt/" "dbt_changed")
94-
95-
if [[ $scenarios || $dataplex || $spark_dataproc || $hive_dataproc || $dbt ]]; then
96-
echo "any_changed=true" >> $GITHUB_OUTPUT
97-
fi
98-
fi
9965
fi
10066
env:
10167
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -207,10 +173,7 @@ jobs:
207173
if: ${{ !failure() && needs.initialize_workflow.outputs.any_run == 'true'}}
208174
uses: ./.github/workflows/collect_and_compare_reports.yml
209175
with:
210-
# Temporarily disabled for dbt producer feature branch testing
211-
# New dbt results are expected failures compared to main branch baseline
212-
# TODO: Re-enable after merge to main or accept dbt custom facet warnings
213-
fail-for-new-failures: false
176+
fail-for-new-failures: true
214177

215178
generate-compatibility-tables:
216179
needs:

.github/workflows/producer_dbt.yml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ jobs:
7474
pip install dbt-core==${{ inputs.dbt_release }}
7575
pip install dbt-postgres
7676
pip install openlineage-dbt==${{ inputs.ol_release }}
77-
pip install -r producer/dbt/test_runner/requirements.txt
7877
7978
- name: Set producer output event dir
8079
if: ${{ steps.init.outputs.scenarios }}
@@ -93,20 +92,22 @@ jobs:
9392
for scenario in "${scenarios[@]}"
9493
do
9594
echo "Running dbt scenario: $scenario"
96-
97-
if ! python3 producer/dbt/test_runner/cli.py run-scenario \
98-
--scenario "$scenario" \
99-
--output-dir "${{ steps.set-producer-output.outputs.event_dir }}"
100-
then
101-
echo "Error: dbt scenario failed: $scenario"
102-
exit 1
103-
fi
95+
96+
mkdir -p "${{ steps.set-producer-output.outputs.event_dir }}/$scenario"
97+
bash producer/dbt/scenarios/$scenario/test/run.sh "${{ steps.set-producer-output.outputs.event_dir }}/$scenario"
10498
10599
echo "Finished running scenario: $scenario"
106100
done
107101
108102
echo "Finished running all scenarios"
109103
104+
- uses: actions/upload-artifact@v4
105+
if: ${{ steps.init.outputs.scenarios }}
106+
with:
107+
name: dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-events
108+
path: ${{ steps.set-producer-output.outputs.event_dir }}
109+
retention-days: 1
110+
110111
- name: Validation
111112
if: ${{ steps.init.outputs.scenarios }}
112113
uses: ./.github/actions/run_event_validation

.gitignore

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -170,25 +170,7 @@ ignored/
170170
bin/
171171

172172
# OpenLineage event files generated during local testing
173-
openlineage_events.json
174-
openlineage_events.jsonl
175-
*/openlineage_events.json
176-
*/openlineage_events.jsonl
177-
**/events/openlineage_events.json
178-
**/events/openlineage_events.jsonl
179-
180-
# Test output files (keep directory structure, ignore contents)
181-
producer/dbt/test_output/*
182-
!producer/dbt/test_output/.gitkeep
183-
184-
# Auto-generated report files (generated by CI/CD)
185-
*_producer_report.json
186-
*_consumer_report.json
187-
generated-files/report.json
188-
189-
# Virtual environments
190-
venv/
191-
test_venv/
192-
*/venv/
193-
*/test_venv/
194-
**/test_venv/
173+
**/specs/
174+
**/output/
175+
**/test/openlineage.yml
176+
dbt_producer_report.json

0 commit comments

Comments
 (0)