#!/bin/sh

# This is a self extracting archive designed for the TREC 2005 spam
# filtering track. It deletes several directories and files in the current 
# working dir, which will then be replaced.
#
# We construct the archive by cat SFX dbacl-xxx.tar.gz > dbacl-xxx.sfx
#
# This script accepts one optional command line argument. If present,
# we check whether a corresponding file named OPTIONS.zzz is present
# in the TREC subdirectory, where zzz is the argument value. This
# file is used to overwrite the OPTIONS file containing the switches
# for the simulation. In this way, we can self-install several
# variations of the classifier.
#
# If no options are given, we use OPTIONS.default if it exists in the 
# current working directory. Each time a command line option is presented,
# the OPTIONS.default file is (re)created automatically from that argument.
#
# If no options are give, and no OPTIONS.default exists in the current
# working directory, then we present a help message and a list of 
# possible OPTIONS.
#
#

NAME=`basename $0 .TREC.sfx.sh`
W=$PWD
SKIP=`grep -a -n -m 1 '^__ARCHIVE_FOLLOWS__' $0 | sed 's/:.*//'`

function usage() {
	echo "Usage: $0 [XXX]"
	echo ""
	echo "Welcome to the dbacl TREC/spam evaluation package."
	echo "This script unpacks automatically into the current directory"
	echo "a fresh copy of all the files and programs expected by the"
	echo "TREC 2005 spamjig (spam filter evaluation system), such as the"
	echo "initialize script."
	echo ""
	echo "If you are seeing this message, then you have yet to select"
	echo "which algorithms and runtime options are to be tested in this"
	echo "instance of the spamjig test run. All you have to do is rerun"
	echo "the present script with the appropriate value of XXX chosen"
	echo "from the list below. This will copy a file named OPTIONS.default"
	echo "into the current directory which will lock your chosen options"
	echo "for all required scripts."
	echo ""
	echo "You can change options later by rerunning this script with"
	echo "another value of XXX, or even edit the OPTIONS.default file"
	echo "directly if you know what you are doing."
	echo ""
	echo "Possible values for XXX:"

	tail +`expr $SKIP + 1` $0 | gunzip -c | tar t | grep 'TREC/OPTIONS.' | sed 's/^.*OPTIONS.//'

}

OPTARG=$1
if [ -z $OPTARG ]; then
	OPTARG=default
	if [ ! -e "$W/OPTIONS.$OPTARG" ]; then
		usage
		exit 1
	fi
fi

echo ""
echo "Installing $NAME - please wait...."
echo ""

rm -rf "$W/$NAME"
tail +`expr $SKIP + 1` $0 | gunzip -c | tar x

if [ -d "$W/$NAME" ]; then
	if [ -n $OPTARG ]; then
		if [ -e "$W/$NAME/TREC/OPTIONS.$OPTARG" ]; then
			cat "$W/$NAME/TREC/OPTIONS.$OPTARG" > "$W/$NAME/TREC/OPTIONS"
			cp -f "$W/$NAME/TREC/OPTIONS.$OPTARG"  "$W/OPTIONS.default"
		else
			echo "No OPTIONS.$OPTARG in $W/$NAME/TREC, trying working directory..."
			if [ -e "$W/OPTIONS.$OPTARG" ]; then
			    echo "Found $W/OPTIONS.$OPTARG."
			    cat "$W/OPTIONS.$OPTARG" > "$W/$NAME/TREC/OPTIONS"
			else
			    echo "Could not find $W/OPTIONS.$OPTARG, using defaults."
			fi
			
		fi
	fi

	echo "Installing these options:"
	echo "---------"
	cat $W/$NAME/TREC/OPTIONS
	echo "---------"

	cd "$W/$NAME"
	./configure "--prefix=$W" && make && make check && make install-strip
	if [ -x "$W/bin/dbacl" ]; then

	    for f in initialize finalize classify train ; do
		rm -f "$W/$f" && cp "$W/share/dbacl/TREC/$f" "$W/$f"
		chmod u+x "$W/$f"
	    done

	else
	    echo "Installation did NOT complete successfully."
	    exit 1
	fi

	cd "$W"
	rm -rf "$W/db" && mkdir "$W/db"
	rm -f "$W/stderr.log"
	"$W/train" ham "$W/share/dbacl/TREC/basic-email"
	"$W/train" spam "$W/share/dbacl/TREC/basic-email"

	if ! cmp -s "$W/stderr.log" "$W/share/dbacl/TREC/verify-stderr" ; then
	    echo "Basic learning failed. See stderr.log below:"
	    echo "---"
	    cat "$W/stderr.log"
	    exit 1
	fi

else
	echo "There was a problem while extracting the archive."
	exit 1
fi

echo "Done!"

exit 0

# no extra characters allowed after this line!
__ARCHIVE_FOLLOWS__
