#!/bin/sh
#
#	apt-walkabout clean-by-exclusion
#
#	Cleans archives from the apt-walkabout cache that are not
#	required by any of the known systems.  It works by generating
#	a list of archives that are required, and then removing the
#	archives in the cache that are not in the required set, in
#	descending order by access time, until the user specified
#	limit is reached.
#
#	Run this on any system.
#	Run this after several successful upgrade cycles.
#	Run this when your apt-walkabout cache is too large.
#
set -e

if [ -r /etc/apt-walkabout.conf ]; then
    . /etc/apt-walkabout.conf 
fi

if [ -r etc/apt-walkabout.conf ]; then
    . etc/apt-walkabout.conf 
fi

LIMIT=${1}
if [ -z "${LIMIT}" ]; then
    LIMIT=${APT_WALKABOUT_CLEAN_LIMIT}
fi

if [ -z "${LIMIT}" ]; then
    echo -n "remove how many kilobytes of archives [none] ? "
    read LIMIT
    if [ -z "${LIMIT}" ]; then
        exit
    fi
fi

HERE=/tmp/apt-walkabout.$$
mkdir ${HERE}

TMP_0=${HERE}/clean-by-exclusion.tmp.0.status
TMP_1=${HERE}/clean-by-exclusion.tmp.1.required
TMP_2=${HERE}/clean-by-exclusion.tmp.2.unique
TMP_3=${HERE}/clean-by-exclusion.tmp.3

# begin with empty list of required archives
rm -f ${TMP_1}

# create a temporary archives tree in /tmp (else apt barfs)
mkdir -p ${HERE}/partial

echo -n "Finding requirements ..."

# for each of the systems we know
for name in `find var/lib/dpkg/ -maxdepth 1 -name "status.*.gz" -printf "%f\n"|cut -f2 -d.` ; do
    echo -n -e "\e[31m ${name}\e[m"
    # extract the status file from our list of them
    gzip --decompress --stdout var/lib/dpkg/status.${name}.gz > ${TMP_0}

    # grab the uris, keep the file names only
    apt-get \
    -o Dir="." \
    -o Dir::Etc::SourceList=${PWD}/etc/apt/sources.list \
    -o Dir::State::Lists=${PWD}/var/lib/apt/lists/ \
    -o Dir::State::status=${TMP_0} \
    -o Dir::Cache::Archives=${HERE} \
    -o Dir::Cache::srcpkgcache=${HERE}/srcpkgcache.bin \
    -o Dir::Cache::pkgcache=${HERE}/pkgcache.bin \
    -o Debug::NoLocking=true \
    --print-uris --yes dist-upgrade | \
    cut -f2 -d' ' | \
    (grep ".deb" >> ${TMP_1} || true)

done

echo " ... done."
echo -n "Inverting ..."

# remove the temporary archives tree
rmdir ${HERE}/partial

# reduce the archive file names required to a unique list
sort ${TMP_1} | uniq > ${TMP_2}

# generate list of archives that are NOT in the required list
# least accessed first (%A@ and sort -rn)
# until cumulative size limit reached (%k and awk)
cd var/cache/apt/archives
find . -name "*.deb" -printf "%A@ %k %f\n" | \
    grep --invert-match --fixed-strings --file ${TMP_2} | \
    sort -rn | \
    awk "{size += \$2; if (size < ${LIMIT}) print \$3;}" > ${TMP_3}
echo " done."

COUNT=`cat ${TMP_3} | wc --words`
echo "Number of files: $COUNT"

if [ "$COUNT" != "0" ]; then

    echo "Files and sizes: "
    du `cat ${TMP_3}`|sort -n
    echo ""
    
    echo -n "Total size: "
    du --total --human-readable `cat ${TMP_3}`|grep total|awk '{print $1}'
    
    echo -n "Press enter to remove these files ? "
    read pause 
    
    # remove the files 
    rm `cat ${TMP_3}`
    
    echo "Done."
    
else
    echo "Nothing to clean, successful."
fi

rm -rf ${HERE}
