diff --git a/.github/workflows/check-duplicates.yml b/.github/workflows/check-duplicates.yml new file mode 100644 index 0000000..b4e5e4b --- /dev/null +++ b/.github/workflows/check-duplicates.yml @@ -0,0 +1,29 @@ +name: Check Duplicates + +on: + push: + branches: [main] + paths: + - 'documents/**' + - 'bin/check-duplicates.sh' + - '.github/workflows/check-duplicates.yml' + pull_request: + branches: [main] + paths: + - 'documents/**' + - 'bin/check-duplicates.sh' + - '.github/workflows/check-duplicates.yml' + +jobs: + check-duplicates: + name: Check for Duplicate Files + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + lfs: true + + - name: Check for duplicates + run: ./bin/check-duplicates.sh diff --git a/.github/workflows/check-zip-validity.yml b/.github/workflows/check-zip-validity.yml new file mode 100644 index 0000000..b480f88 --- /dev/null +++ b/.github/workflows/check-zip-validity.yml @@ -0,0 +1,36 @@ +name: Check ZIP Validity + +on: + push: + branches: [main] + paths: + - 'documents/**/*.odt' + - 'documents/**/*.docx' + - 'documents/**/*.epub' + - 'bin/check-zip-validity.sh' + - '.github/workflows/check-zip-validity.yml' + pull_request: + branches: [main] + paths: + - 'documents/**/*.odt' + - 'documents/**/*.docx' + - 'documents/**/*.epub' + - 'bin/check-zip-validity.sh' + - '.github/workflows/check-zip-validity.yml' + +jobs: + check-zip: + name: Validate ZIP Structure + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + lfs: true + + - name: Install unzip + run: sudo apt-get update && sudo apt-get install -y unzip + + - name: Check ZIP validity + run: ./bin/check-zip-validity.sh diff --git a/bin/check-duplicates.sh b/bin/check-duplicates.sh new file mode 100755 index 0000000..d6fa6b0 --- /dev/null +++ b/bin/check-duplicates.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Check for duplicate files in the documents directory +# SPDX-License-Identifier: MIT + +set -euo pipefail + +echo "Checking for duplicate files in documents/..." +echo "" + +TMPFILE=$(mktemp) +trap 'rm -f "$TMPFILE"' EXIT + +find documents/ -type f \( -name "*.odt" -o -name "*.docx" -o -name "*.pdf" -o -name "*.epub" -o -name "*.md" \) -exec sha256sum {} \; | sort >"$TMPFILE" + +DUPLICATES=$(awk '{print $1}' "$TMPFILE" | uniq -d) + +if [ -z "$DUPLICATES" ]; then + echo "✅ No duplicate files found" + exit 0 +fi + +echo "❌ Duplicate files detected:" +echo "" + +FOUND_DUPLICATES=0 +for CHECKSUM in $DUPLICATES; do + echo "Checksum: $CHECKSUM" + grep "^$CHECKSUM" "$TMPFILE" | awk '{print " - " $2}' + echo "" + FOUND_DUPLICATES=1 +done + +if [ $FOUND_DUPLICATES -eq 1 ]; then + echo "❌ Found duplicate files. Please remove duplicates before committing." + exit 1 +fi diff --git a/bin/check-zip-validity.sh b/bin/check-zip-validity.sh new file mode 100755 index 0000000..62e14ad --- /dev/null +++ b/bin/check-zip-validity.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Check if ODT/EPUB/DOCX files are valid ZIP archives +# SPDX-License-Identifier: MIT + +set -euo pipefail + +echo "Checking if ODT/EPUB/DOCX files are valid ZIP archives..." +echo "" + +INVALID=0 +TOTAL=0 + +# Find all ODT, EPUB, and DOCX files +while IFS= read -r -d '' FILE; do + TOTAL=$((TOTAL + 1)) + + if unzip -t "$FILE" >/dev/null 2>&1; then + echo "✓ Valid: $FILE" + else + echo "❌ Invalid: $FILE" + INVALID=$((INVALID + 1)) + fi +done < <(find documents/ -type f \( -name "*.odt" -o -name "*.docx" -o -name "*.epub" \) -print0) + +echo "" +echo "Checked $TOTAL files" + +if [ $INVALID -eq 0 ]; then + echo "✅ All files are valid ZIP archives" + exit 0 +else + echo "❌ Found $INVALID invalid ZIP file(s)" + exit 1 +fi