#!/bin/bash
# Simple shell script for importing a collection of Debian source packages
# into a git repository.
#
# Copyright(C) 2007, 2008, Ron <ron@debian.org>
# This script is distributed according to the terms of the GNU GPL.

set -e

UPSTREAM_BRANCH="upstream"
DEBIAN_BRANCH="master"

UPSTREAM_TAG_PREFIX="v"
DEBIAN_TAG_PREFIX="v"

# We default to the linux kernel style tags, but people who prefer the
# git-buildpackage style and like slashes in their tags can do this:
#UPSTREAM_TAG_PREFIX="$UPSTREAM_BRANCH/"
#DEBIAN_TAG_PREFIX="debian/"


usage()
{
    cat 1>&2 <<EOF

git-debimport [-v|--verbose] [--fetch] [--late-merge] path-prefix

 This program will create a git repository of all files that match
 \${path-prefix}_*.diff.gz (with their corresponding orig.tar.gz),
 or of all files that match \${path-prefix}_*.tar.gz (for Debian
 native packages).  If the --fetch option is supplied it will try
 to download all available versions from snapshot.debian.net rather
 than use an existing set of packages.

  For example:
  $ mkdir mydestdir && cd mydestdir
  $ git-debimport ../mysrcdir/mypackagename

 If the --fetch option is used in the example above, then all versions
 of mypackagename will be downloaded into mysrcdir prior to creating a
 git repository from them.

 The --late-merge option if supplied will delay merging new upstream
 releases into the debian branch until all packages are imported.  This
 may be useful where merge conflicts would need to be manually resolved
 that we know will already be resolved by the next diff.gz imported.
 This will result in a poorer quality history in the repository, however
 every package imported will still be able to be extracted correctly from
 the tags that define it.  This option has no effect upon native package
 imports (as there will be only one branch in that case).

EOF

    exit $1
}

# This function replaces all consecutive illegal constructs in a git refname
# with a single '_'.  The rules for git refnames are described in the manual
# page for git-check-ref-format(1).
sanitise_git_ref()
{
    (
	shopt -s extglob

	ref="${1//\/.//_}"				# rule 1.
	ref="${ref//../_}"				# rule 2.
	ref="${ref//[[:cntrl:][:space:]:~^?*[]/_}"	# rule 3.
	ref="${ref%%+(/)}"				# rule 4.
	ref="${ref//+(_)_/_}"

	echo "$ref"
    )
}


for arg; do
    case "$arg" in
	-v|--verbose)
	    verbose="--verbose"
	    ;;

	--fetch)
	    fetch_snapshots="yes"
	    ;;

	--late-merge)
	    late_merge="yes"
	    ;;

	--*|-*)
	    echo "ERROR: Unrecognised option '$arg'"
	    usage 1
	    ;;

	*)
	    if [ -n "$package_path" ]; then
		echo "ERROR: Only one package path expected"
		usage 1
	    fi
	    package_path="$arg"
	    ;;
    esac
done

[ -n "$package_path" ] || usage 1


PACKAGES_DIR="$(dirname $package_path)"
PACKAGE_NAME="$(basename $package_path)"

case "$PACKAGES_DIR" in /*) ;; *) PKG_ROOT="../" ;; esac

if [ -e "$PACKAGE_NAME" ]; then
    echo "A $PACKAGE_NAME dir already exists, please (re)move it first"
    exit 1
fi

PACKAGE_DIFFS="$(find $PACKAGES_DIR -type f -name "${PACKAGE_NAME}_*.diff.gz" 2>/dev/null | sort)"
PACKAGE_TARS="$(find $PACKAGES_DIR -type f -name "${PACKAGE_NAME}_*.tar.gz" \! -name "*.orig.tar.gz" 2>/dev/null | sort)"

if [ -z "$PACKAGE_DIFFS" ] && [ -z "$PACKAGE_TARS" ]; then

    if [ -z "$fetch_snapshots" ]; then
	echo "No ${package_path}_* diff.gz or tar.gz files found, aborting."
	exit 1
    fi

    DEBSNAP="$(which debsnap || true)"
    if [ -z "$DEBSNAP" ]; then
	echo "debsnap not found, unable to fetch files."
	exit 1
    fi

    $DEBSNAP $verbose --destdir "$PACKAGES_DIR" "$PACKAGE_NAME" || ret=$?

    case "$ret" in
	"") ;;
	2)
	    echo "WARNING: some files failed to be fetched"
	    ;;
	*)
	    echo "ERROR: return code $ret from $DEBSNAP, aborting."
	    exit 1
	    ;;
    esac

    PACKAGE_DIFFS="$(find $PACKAGES_DIR -type f -name "${PACKAGE_NAME}_*.diff.gz" 2>/dev/null | sort)"
    PACKAGE_TARS="$(find $PACKAGES_DIR -type f -name "${PACKAGE_NAME}_*.tar.gz" \! -name "*.orig.tar.gz" 2>/dev/null | sort)"

    if [ -z "$PACKAGE_DIFFS" ] && [ -z "$PACKAGE_TARS" ]; then
	echo "No packages were able to be fetched, aborting."
	exit 1
    fi

elif [ -n "$fetch_snapshots" ]; then

    echo "Package files already exist under $PACKAGES_DIR"
    echo "Please (re)move them first if you wish to --fetch snapshots."
    exit 1

fi

if [ -n "$PACKAGE_DIFFS" ] && [ -n "$PACKAGE_TARS" ]; then

    echo "A mix of native and non-native package exist in $PACKAGES_DIR"
    echo "That case isn't handled yet, sorry.  Patches welcome if you need it."
    exit 1

fi


mkdir "$PACKAGE_NAME"
cd "$PACKAGE_NAME"
git init

COMPARE="dpkg --compare-versions"


if [ -n "$PACKAGE_TARS" ]; then

    # See below for details on sorting the package order ...
    P=( $PACKAGE_TARS )
    count=${#P[*]}

    for(( i=1; i < count; ++i )) do
	j=i
	while (($j)) && $COMPARE "${P[j-1]%.tar.gz}" gt "${P[i]%.tar.gz}"; do ((--j)); done
	((i==j)) || P=( ${P[@]:0:j} ${P[i]} ${P[j]} ${P[@]:j+1:i-(j+1)} ${P[@]:i+1} )
    done

    PACKAGE_TARS="${P[@]}"

    for f in $PACKAGE_TARS; do

	PACKAGE_VERSION="${f%.tar.gz}"
	PACKAGE_VERSION="${PACKAGE_VERSION##*_}"

	echo "Importing $PACKAGES_DIR/${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.gz"

	find -maxdepth 1 -mindepth 1 \! -name ".git" -exec rm -rf '{}' +
	tar -xf "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.gz" \
	    --strip 1

	# Shouldn't be needed here, but just in case ...
	chmod 755 debian/rules

	DATE=$(dpkg-parsechangelog | sed -n 's/Date: //p')
	AUTHOR=$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f1)
	EMAIL=\<$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f2)

	git add .
	if git --no-pager status -a > /dev/null 2>&1; then
	    GIT_AUTHOR_NAME="$AUTHOR" GIT_COMMITTER_NAME="$AUTHOR" \
	    GIT_AUTHOR_EMAIL="$EMAIL" GIT_COMMITTER_EMAIL="$EMAIL" \
	    GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
		git commit -a -m "git-debimport ${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.gz"
	else
	    echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.gz"
	fi
	git tag $(sanitise_git_ref "$DEBIAN_TAG_PREFIX$PACKAGE_VERSION")

    done

    echo "All done!"
    exit 0
fi


# We really need the packages in the order dpkg thinks they are in here, and
# the only way to reliably know that is to ask dpkg what it thinks.  Since that
# is a rather expensive operation, and the number of operations to be performed
# grows rapidly as the number of packages to import gets longer, we must do the
# only sane thing feasible, and cheat.
#
# By doing a fast lexical pre-sort of the list, we can in almost all but the
# most pathological cases get the order almost, or even exactly, right.  So from
# that probable starting point, Simplicity, in her infinite wisdom, will reward
# all those people who numbered their packages sanely, with O(n) or near to it
# performance in determining the Proper order if we just do a trivial insertion
# sort for this next step.
#
# Pros:
# The Good People will get better results than the most fancy patent pending
# product of a college education algorithm is likely to do, and the Bad People
# will get the time-squared in pergatory that they deserve.
#
# Cons:
# Had you inferred all this from the 4 lines of code below?
#
# Todo:
# Maybe output a warning that this could take a while if $count > N.
# Determine N.

P=( $PACKAGE_DIFFS )
count=${#P[*]}

for(( i=1; i < count; ++i )) do
    j=i
    #echo "was $i: ${P[i]}"
    while (($j)) && $COMPARE "${P[j-1]%.diff.gz}" gt "${P[i]%.diff.gz}"; do ((--j)); done
    ((i==j)) || P=( ${P[@]:0:j} ${P[i]} ${P[j]} ${P[@]:j+1:i-(j+1)} ${P[@]:i+1} )
done
#for(( i=1; i < count; ++i )) do echo "now $i: ${P[i]}"; done

PACKAGE_DIFFS="${P[@]}"
CACHE_DIR="../${PACKAGE_NAME}-import-cache"

for f in $PACKAGE_DIFFS; do

    DEBIAN_VERSION="${f%.diff.gz}"
    DEBIAN_VERSION="${DEBIAN_VERSION##*_}"
    UPSTREAM_VERSION="${DEBIAN_VERSION%-*}"

    if [ -z "$LAST_UPSTREAM_VERSION" ]; then
    	echo "Initial import of $PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz"

	rm -rf "$CACHE_DIR"
	mkdir -p "$CACHE_DIR/${PACKAGE_NAME}-${UPSTREAM_VERSION}.orig"
	tar -xf "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz" \
	     -C "$CACHE_DIR/${PACKAGE_NAME}-${UPSTREAM_VERSION}.orig" --strip 1
	find "$CACHE_DIR/${PACKAGE_NAME}-${UPSTREAM_VERSION}.orig" \
	     -maxdepth 1 -mindepth 1 -exec cp -al '{}' . \;

	DATE=$(file -L $PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz | sed -n "s/.*, last modified: \([^,]*\),*.*/\1/p")

	git add .
	if git --no-pager status -a > /dev/null 2>&1; then
	    GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
		git commit -a -m "git-debimport ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz"
	else
	    # This particular case probably should still crap out, if the
	    # initial orig is empty that doesn't bode well for things to come
	    echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz"
	fi
	git checkout -b "$UPSTREAM_BRANCH"
	git tag $(sanitise_git_ref "$UPSTREAM_TAG_PREFIX$UPSTREAM_VERSION")

	LAST_UPSTREAM_VERSION="$UPSTREAM_VERSION"

	git checkout "$DEBIAN_BRANCH"
    fi

    if [ "$LAST_UPSTREAM_VERSION" != "$UPSTREAM_VERSION" ]; then
    	echo "Importing $PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz"

	git checkout "$UPSTREAM_BRANCH"

	find -maxdepth 1 -mindepth 1 \! -name ".git" -exec rm -rf '{}' +
	rm -rf "$CACHE_DIR"
	mkdir -p "$CACHE_DIR/${PACKAGE_NAME}-${UPSTREAM_VERSION}.orig"
	tar -xf "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz" \
	     -C "$CACHE_DIR/${PACKAGE_NAME}-${UPSTREAM_VERSION}.orig" --strip 1
	find "$CACHE_DIR/${PACKAGE_NAME}-${UPSTREAM_VERSION}.orig" \
	     -maxdepth 1 -mindepth 1 -exec cp -al '{}' . \;

	DATE=$(file -L $PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz | sed -n "s/.*, last modified: \([^,]*\),*.*/\1/p")

	git add .
	if git --no-pager status -a > /dev/null 2>&1; then
	    GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
		git commit -a -m "git-debimport ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz"
	else
	    echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.gz"
	fi
	git tag $(sanitise_git_ref "$UPSTREAM_TAG_PREFIX$UPSTREAM_VERSION")

	LAST_UPSTREAM_VERSION="$UPSTREAM_VERSION"

	git checkout "$DEBIAN_BRANCH"
	[ -n "$late_merge" ] || git merge "$UPSTREAM_BRANCH"

	#XXX If we were to just always merge here with -s ours, would that
	#    avoid the chance of merge conflicts but still keep the accurate
	#    history once we've fixed up the branch below?
	#    Try this when we find a set of packages that do have conflicts.
    fi

    echo "Importing $f"

    find -maxdepth 1 -mindepth 1 \! -name ".git" -exec rm -rf '{}' +
    find "$CACHE_DIR/${PACKAGE_NAME}-${UPSTREAM_VERSION}.orig" \
	 -maxdepth 1 -mindepth 1 -exec cp -al '{}' . \;
    zcat "$PKG_ROOT$f" | patch -p1
    chmod 755 debian/rules

    DATE=$(dpkg-parsechangelog | sed -n 's/Date: //p')
    AUTHOR=$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f1)
    EMAIL=\<$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f2)

    git add .
    if git --no-pager status -a > /dev/null 2>&1; then
	GIT_AUTHOR_NAME="$AUTHOR" GIT_COMMITTER_NAME="$AUTHOR" \
	GIT_AUTHOR_EMAIL="$EMAIL" GIT_COMMITTER_EMAIL="$EMAIL" \
	GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
	    git commit -a -m "git-debimport ${PACKAGE_NAME}_${DEBIAN_VERSION}.diff.gz"
    else
	echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${DEBIAN_VERSION}.diff.gz"
    fi
    git tag $(sanitise_git_ref "$DEBIAN_TAG_PREFIX$DEBIAN_VERSION")

done

rm -rf "$CACHE_DIR"
[ -z "$late_merge" ] || git merge -s ours "$UPSTREAM_BRANCH"

echo "All done!"

# vi:sts=4:sw=4:noet:foldmethod=marker
