Files
rag-solution/unzip_archives.sh

145 lines
6.1 KiB
Bash
Executable File

#!/bin/bash
# Script to recursively unzip archives in the data folder
# Valid archives are extracted in place, then moved to data-unpacked-archives
# Invalid/broken archives are moved to data-broken-archives
set -e # Exit on any error
DATA_DIR="./data"
UNPACKED_DIR="./data-unpacked-archives"
BROKEN_DIR="./data-broken-archives"
# Create destination directories if they don't exist
mkdir -p "$UNPACKED_DIR"
mkdir -p "$BROKEN_DIR"
# Find all zip files recursively in the data directory
find "$DATA_DIR" -type f -name "*.zip" | while read -r archive; do
echo "Processing: $archive"
# Check if the zip file is valid and not password protected
if unzip -t "$archive" >/dev/null 2>&1; then
echo " Archive is valid, extracting..."
# Extract the archive in the same directory where it's located
ARCHIVE_DIR=$(dirname "$archive")
unzip -o "$archive" -d "$ARCHIVE_DIR"
# Move the processed archive to the unpacked directory
mv "$archive" "$UNPACKED_DIR/"
echo " Successfully extracted and moved to $UNPACKED_DIR"
else
echo " Archive is invalid, password-protected, or in unsupported format"
# Move the broken archive to the broken directory
mv "$archive" "$BROKEN_DIR/"
echo " Moved to $BROKEN_DIR"
fi
done
# Also handle other common archive formats that might be present
for ext in rar 7z tar.gz tar.xz tar.bz2 gz xz bz2 tar; do
find "$DATA_DIR" -type f -name "*.$ext" | while read -r archive; do
echo "Processing: $archive (non-zip format)"
case $ext in
rar)
if command -v unrar >/dev/null 2>&1; then
if unrar l "$archive" >/dev/null 2>&1; then
ARCHIVE_DIR=$(dirname "$archive")
unrar x "$archive" "$ARCHIVE_DIR"/
mv "$archive" "$UNPACKED_DIR/"
echo " Successfully extracted RAR and moved to $UNPACKED_DIR"
else
mv "$archive" "$BROKEN_DIR/"
echo " Could not process RAR, moved to $BROKEN_DIR"
fi
else
mv "$archive" "$BROKEN_DIR/"
echo " unrar not available, moved to $BROKEN_DIR"
fi
;;
7z)
if command -v 7z >/dev/null 2>&1; then
if 7z l "$archive" >/dev/null 2>&1; then
ARCHIVE_DIR=$(dirname "$archive")
7z x "$archive" -o"$ARCHIVE_DIR"/
mv "$archive" "$UNPACKED_DIR/"
echo " Successfully extracted 7z and moved to $UNPACKED_DIR"
else
mv "$archive" "$BROKEN_DIR/"
echo " Could not process 7z, moved to $BROKEN_DIR"
fi
else
mv "$archive" "$BROKEN_DIR/"
echo " 7z not available, moved to $BROKEN_DIR"
fi
;;
tar.gz|tgz|gz)
if gunzip -t "$archive" >/dev/null 2>&1 || tar -tzf "$archive" >/dev/null 2>&1; then
ARCHIVE_DIR=$(dirname "$archive")
if [[ "$ext" == "gz" ]]; then
# For gz files, we need to decompress in place
cp "$archive" "$ARCHIVE_DIR/"
gzip -d "$ARCHIVE_DIR/$(basename "$archive")"
else
tar -xzf "$archive" -C "$ARCHIVE_DIR"/
fi
mv "$archive" "$UNPACKED_DIR/"
echo " Successfully extracted $ext and moved to $UNPACKED_DIR"
else
mv "$archive" "$BROKEN_DIR/"
echo " Could not process $ext, moved to $BROKEN_DIR"
fi
;;
tar.bz2|bz2)
if bzip2 -t "$archive" >/dev/null 2>&1 || tar -tjf "$archive" >/dev/null 2>&1; then
ARCHIVE_DIR=$(dirname "$archive")
if [[ "$ext" == "bz2" ]]; then
# For bz2 files, we need to decompress in place
cp "$archive" "$ARCHIVE_DIR/"
bzip2 -d "$ARCHIVE_DIR/$(basename "$archive")"
else
tar -xjf "$archive" -C "$ARCHIVE_DIR"/
fi
mv "$archive" "$UNPACKED_DIR/"
echo " Successfully extracted $ext and moved to $UNPACKED_DIR"
else
mv "$archive" "$BROKEN_DIR/"
echo " Could not process $ext, moved to $BROKEN_DIR"
fi
;;
tar.xz|xz)
if xz -t "$archive" >/dev/null 2>&1 || tar -tJf "$archive" >/dev/null 2>&1; then
ARCHIVE_DIR=$(dirname "$archive")
if [[ "$ext" == "xz" ]]; then
# For xz files, we need to decompress in place
cp "$archive" "$ARCHIVE_DIR/"
xz -d "$ARCHIVE_DIR/$(basename "$archive")"
else
tar -xJf "$archive" -C "$ARCHIVE_DIR"/
fi
mv "$archive" "$UNPACKED_DIR/"
echo " Successfully extracted $ext and moved to $UNPACKED_DIR"
else
mv "$archive" "$BROKEN_DIR/"
echo " Could not process $ext, moved to $BROKEN_DIR"
fi
;;
tar)
if tar -tf "$archive" >/dev/null 2>&1; then
ARCHIVE_DIR=$(dirname "$archive")
tar -xf "$archive" -C "$ARCHIVE_DIR"/
mv "$archive" "$UNPACKED_DIR/"
echo " Successfully extracted TAR and moved to $UNPACKED_DIR"
else
mv "$archive" "$BROKEN_DIR/"
echo " Could not process TAR, moved to $BROKEN_DIR"
fi
;;
esac
done
done
echo "Processing complete!"