Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/check-duplicates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Check Duplicates

on:
push:
branches: [main]
paths:
- 'documents/**'
- 'bin/check-duplicates.sh'
- '.github/workflows/check-duplicates.yml'
pull_request:
branches: [main]
paths:
- 'documents/**'
- 'bin/check-duplicates.sh'
- '.github/workflows/check-duplicates.yml'

jobs:
check-duplicates:
name: Check for Duplicate Files
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true

- name: Check for duplicates
run: ./bin/check-duplicates.sh
36 changes: 36 additions & 0 deletions .github/workflows/check-zip-validity.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Check ZIP Validity

on:
push:
branches: [main]
paths:
- 'documents/**/*.odt'
- 'documents/**/*.docx'
- 'documents/**/*.epub'
- 'bin/check-zip-validity.sh'
- '.github/workflows/check-zip-validity.yml'
pull_request:
branches: [main]
paths:
- 'documents/**/*.odt'
- 'documents/**/*.docx'
- 'documents/**/*.epub'
- 'bin/check-zip-validity.sh'
- '.github/workflows/check-zip-validity.yml'

jobs:
check-zip:
name: Validate ZIP Structure
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true

- name: Install unzip
run: sudo apt-get update && sudo apt-get install -y unzip

- name: Check ZIP validity
run: ./bin/check-zip-validity.sh
36 changes: 36 additions & 0 deletions bin/check-duplicates.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
# Check for duplicate files in the documents directory
# SPDX-License-Identifier: MIT

set -euo pipefail

echo "Checking for duplicate files in documents/..."
echo ""

TMPFILE=$(mktemp)
trap 'rm -f "$TMPFILE"' EXIT

find documents/ -type f \( -name "*.odt" -o -name "*.docx" -o -name "*.pdf" -o -name "*.epub" -o -name "*.md" \) -exec sha256sum {} \; | sort >"$TMPFILE"

DUPLICATES=$(awk '{print $1}' "$TMPFILE" | uniq -d)

if [ -z "$DUPLICATES" ]; then
echo "✅ No duplicate files found"
exit 0
fi

echo "❌ Duplicate files detected:"
echo ""

FOUND_DUPLICATES=0
for CHECKSUM in $DUPLICATES; do
echo "Checksum: $CHECKSUM"
grep "^$CHECKSUM" "$TMPFILE" | awk '{print " - " $2}'
echo ""
FOUND_DUPLICATES=1
done

if [ $FOUND_DUPLICATES -eq 1 ]; then
echo "❌ Found duplicate files. Please remove duplicates before committing."
exit 1
fi
34 changes: 34 additions & 0 deletions bin/check-zip-validity.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Check if ODT/EPUB/DOCX files are valid ZIP archives
# SPDX-License-Identifier: MIT

set -euo pipefail

echo "Checking if ODT/EPUB/DOCX files are valid ZIP archives..."
echo ""

INVALID=0
TOTAL=0

# Find all ODT, EPUB, and DOCX files
while IFS= read -r -d '' FILE; do
TOTAL=$((TOTAL + 1))

if unzip -t "$FILE" >/dev/null 2>&1; then
echo "✓ Valid: $FILE"
else
echo "❌ Invalid: $FILE"
INVALID=$((INVALID + 1))
fi
done < <(find documents/ -type f \( -name "*.odt" -o -name "*.docx" -o -name "*.epub" \) -print0)

echo ""
echo "Checked $TOTAL files"

if [ $INVALID -eq 0 ]; then
echo "✅ All files are valid ZIP archives"
exit 0
else
echo "❌ Found $INVALID invalid ZIP file(s)"
exit 1
fi