#!/bin/bash
# Simple shell script for importing a collection of Debian source packages
# into a git repository.
#
# Copyright(C) 2007, 2008, Ron <ron@debian.org>
# This script is distributed according to the terms of the GNU GPL.

set -e

UPSTREAM_BRANCH="upstream"
DEBIAN_BRANCH="master"

UPSTREAM_TAG_PREFIX="v"
DEBIAN_TAG_PREFIX="v"

# We default to the linux kernel style tags, but people who prefer the
# git-buildpackage style and like slashes in their tags can do this:
#UPSTREAM_TAG_PREFIX="$UPSTREAM_BRANCH/"
#DEBIAN_TAG_PREFIX="debian/"


usage()
{
    cat 1>&2 <<EOF

git-debimport [-v|--verbose] [--fetch] [--late-merge] path-prefix

 This program will create a git repository of all files that match
 \${path-prefix}_*.diff.gz or \${path-prefix}_*.debian.tar.{gz,bz2,xz}
 (with their corresponding orig.tar.{gz,bz2,xz}), or of all files that
 match \${path-prefix}_*.tar.{gz,bz2,xz} (for Debian native packages).
 If the --fetch option is supplied it will try to download all available
 versions from snapshot.debian.org rather than use an existing set of
 local packages.

  For example:
  $ mkdir mydestdir && cd mydestdir
  $ git-debimport ../mysrcdir/mypackagename

 If the --fetch option is used in the example above, then all versions
 of mypackagename will be downloaded into mysrcdir prior to creating a
 git repository from them.

 The --late-merge option if supplied will delay merging new upstream
 releases into the debian branch until all packages are imported.  This
 may be useful where merge conflicts would need to be manually resolved
 that we know will already be resolved by the next diff.gz imported.
 This will result in a poorer quality history in the repository, however
 every package imported will still be able to be extracted correctly from
 the tags that define it.  This option has no effect upon native package
 imports (as there will be only one branch in that case).

EOF

    exit $1
}

# This function replaces all consecutive illegal constructs in a git refname
# with a single '_'.  The rules for git refnames are described in the manual
# page for git-check-ref-format(1).
sanitise_git_ref()
{
    (
	shopt -s extglob

	ref="${1//\/.//_}"				# rule 1.
	# must have at least 1 '/'			# rule 2 is NFU.
	ref="${ref//../_}"				# rule 3.
	ref="${ref//[[:cntrl:][:space:]:~^?*[]/_}"	# rule 4.
	ref="${ref%%+(/|.)}"				# rule 5.
	ref="${ref/%.lock/.loc}"			# rule 6.
	ref="${ref//@{/_}"				# rule 7.
	ref="${ref//\\\\/_}"				# rule 8.
	ref="${ref//+(_)_/_}"

	echo "$ref"
    )
}


for arg; do
    case "$arg" in
	-v|--verbose)
	    verbose="--verbose"
	    ;;

	--fetch)
	    fetch_snapshots="yes"
	    ;;

	--late-merge)
	    late_merge="yes"
	    ;;

	--*|-*)
	    echo "ERROR: Unrecognised option '$arg'"
	    usage 1
	    ;;

	*)
	    if [ -n "$package_path" ]; then
		echo "ERROR: Only one package path expected"
		usage 1
	    fi
	    package_path="$arg"
	    ;;
    esac
done

[ -n "$package_path" ] || usage 1


PACKAGES_DIR="$(dirname $package_path)"
PACKAGE_NAME="$(basename $package_path)"

case "$PACKAGES_DIR" in /*) ;; *) PKG_ROOT="../" ;; esac

if [ -e "$PACKAGE_NAME" ]; then
    echo "A $PACKAGE_NAME dir already exists, please (re)move it first"
    exit 1
fi


get_native_sources()
{
    echo "$(find $1 -type f \( -name "${2}_*.tar.gz"	\
			    -o -name "${2}_*.tar.bz2"	\
			    -o -name "${2}_*.tar.xz" \)	\
			    \! -name "*.orig.tar.gz"	\
			    \! -name "*.orig.tar.bz2"	\
			    \! -name "*.orig.tar.xz"	\
			    \! -name "*.debian.tar.gz"	\
			    \! -name "*.debian.tar.bz2"	\
			    \! -name "*.debian.tar.xz"	\
							2>/dev/null | sort)"
}

get_debian_sources()
{
    echo "$(find $1 -type f -name "${2}_*.diff.gz"		\
			 -o -name "${2}_*.debian.tar.gz"	\
			 -o -name "${2}_*.debian.tar.bz2"	\
			 -o -name "${2}_*.debian.tar.xz"	\
							2>/dev/null | sort)"

}

PACKAGE_TARS="$(get_native_sources "$PACKAGES_DIR" "$PACKAGE_NAME")"
PACKAGE_DIFFS="$(get_debian_sources "$PACKAGES_DIR" "$PACKAGE_NAME")"


if [ -z "$PACKAGE_DIFFS" ] && [ -z "$PACKAGE_TARS" ]; then

    if [ -z "$fetch_snapshots" ]; then
	echo "No ${package_path}_* diff.gz or tar.{gz,bz2,xz} files found, aborting."
	exit 1
    fi

    DEBSNAP="$(which debsnap || true)"
    if [ -z "$DEBSNAP" ]; then
	echo "debsnap not found, unable to fetch files."
	exit 1
    fi

    $DEBSNAP $verbose --destdir="$PACKAGES_DIR" "$PACKAGE_NAME" || ret=$?

    case "$ret" in
	"") ;;
	2)
	    echo "WARNING: some files failed to be fetched"
	    ;;
	*)
	    echo "ERROR: return code $ret from $DEBSNAP, aborting."
	    exit 1
	    ;;
    esac

    PACKAGE_TARS="$(get_native_sources "$PACKAGES_DIR" "$PACKAGE_NAME")"
    PACKAGE_DIFFS="$(get_debian_sources "$PACKAGES_DIR" "$PACKAGE_NAME")"

    if [ -z "$PACKAGE_DIFFS" ] && [ -z "$PACKAGE_TARS" ]; then
	echo "No packages were able to be fetched, aborting."
	exit 1
    fi

elif [ -n "$fetch_snapshots" ]; then

    echo "Package files already exist under $PACKAGES_DIR"
    echo "Please (re)move them first if you wish to --fetch snapshots."
    exit 1

fi

if [ -n "$PACKAGE_DIFFS" ] && [ -n "$PACKAGE_TARS" ]; then

    echo "A mix of native and non-native package exist in $PACKAGES_DIR"
    echo "That case isn't handled yet, sorry.  Patches welcome if you need it."
    exit 1

fi


CACHE_DIR="../${PACKAGE_NAME}-import-cache"

strip_native_suffix()
{
    (
	shopt -s extglob
	echo "${1%.tar.@(gz|bz2|xz)}"
    )
}

strip_debian_suffix()
{
    # This one is a bit more permissive than is actually allowed at present.
    # Only diff.gz is actually supported by dpkg, but we don't really care
    # about that here, since we aren't making packages, just unpacking them,
    # and a diff.xz is as good as anything else for that if someone has one.
    (
	shopt -s extglob
	echo "${1%.@(diff|debian.tar).@(gz|bz2|xz)}"
    )
}

get_compression_type()
{
    case $1 in
	*.gz)
	    echo "gz"
	    ;;

	*.bz2)
	    echo "bz2"
	    ;;

	*.xz)
	    echo "xz"
	    ;;

	*)
	    echo "Unsupported compression type for '$1'" >&2
	    exit 1
	    ;;
    esac
}

get_orig_type()
{
    if [ -e "$1.gz" ]; then
	echo "gz"
    elif [ -e "$1.bz2" ]; then
	echo "bz2"
    elif [ -e "$1.xz" ]; then
	echo "bz2"
    else
	echo "Unable to locate orig '$1.{gz,bz2,xz}'" >&2
	exit 1
    fi
}

compare_tars()
{
    p1="$(strip_native_suffix $1)"
    p2="$(strip_native_suffix $2)"
    p1="${p1##*_}"
    p2="${p2##*_}"

    dpkg --compare-versions "$p1" gt "$p2"
}

compare_diffs()
{
    p1="$(strip_debian_suffix $1)"
    p2="$(strip_debian_suffix $2)"
    p1="${p1##*_}"
    p2="${p2##*_}"

    dpkg --compare-versions "$p1" gt "$p2"
}

single_subdir()
{
    _ssd=
    for f in $1/*; do
	[ -z "$_ssd" ]	|| exit 1
	[ -d "$f" ]	|| exit 1
	_ssd="$f"
    done
    echo "$_ssd"
}

unpack_tarball()
{
    rm -rf "$CACHE_DIR"
    mkdir -p "$CACHE_DIR/$2"
    tar -xf "$1" -C "$CACHE_DIR/$2"
}

copy_sourcedir()
{
    if SINGLE_SUBDIR="$(single_subdir $CACHE_DIR/$1)"; then
	find "$SINGLE_SUBDIR" -maxdepth 1 -mindepth 1 -exec cp -al '{}' . \;
    else
	find "$CACHE_DIR/$1" -maxdepth 1 -mindepth 1 -exec cp -al '{}' . \;
    fi
}

move_sourcedir()
{
    if SINGLE_SUBDIR="$(single_subdir $CACHE_DIR/$1)"; then
	find "$SINGLE_SUBDIR" -maxdepth 1 -mindepth 1 -exec mv '{}' . \;
    else
	find "$CACHE_DIR/$1" -maxdepth 1 -mindepth 1 -exec mv '{}' . \;
    fi
}

mkdir "$PACKAGE_NAME"
cd "$PACKAGE_NAME"
git init


if [ -n "$PACKAGE_TARS" ]; then

    # See below for details on sorting the package order ...
    P=( $PACKAGE_TARS )
    count=${#P[*]}

    for(( i=1; i < count; ++i )) do
	j=$i
	while (($j)) && compare_tars "${P[j-1]}" "${P[i]}"; do ((--j)) || true; done
	((i==j)) || P=( ${P[@]:0:j} ${P[i]} ${P[j]} ${P[@]:j+1:i-(j+1)} ${P[@]:i+1} )
    done

    PACKAGE_TARS="${P[@]}"

    for f in $PACKAGE_TARS; do

	PACKAGE_VERSION="$(strip_native_suffix $f)"
	PACKAGE_VERSION="${PACKAGE_VERSION##*_}"
	COMP_TYPE="$(get_compression_type $f)"

	echo "Importing $PACKAGES_DIR/${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.$COMP_TYPE"

	find -maxdepth 1 -mindepth 1 \! -name ".git" -exec rm -rf '{}' +
	unpack_tarball "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.$COMP_TYPE" \
		       "${PACKAGE_NAME}-${UPSTREAM_VERSION}.tmp"
	move_sourcedir "${PACKAGE_NAME}-${UPSTREAM_VERSION}.tmp"

	# Shouldn't be needed here, but just in case ...
	chmod 755 debian/rules

	DATE=$(dpkg-parsechangelog | sed -n 's/Date: //p')
	AUTHOR=$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f1)
	EMAIL=\<$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f2)

	git add .
	if git --no-pager commit --dry-run -a > /dev/null 2>&1; then
	    GIT_AUTHOR_NAME="$AUTHOR" GIT_COMMITTER_NAME="$AUTHOR" \
	    GIT_AUTHOR_EMAIL="$EMAIL" GIT_COMMITTER_EMAIL="$EMAIL" \
	    GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
		git commit -a -m "git-debimport ${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.$COMP_TYPE"
	else
	    echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.$COMP_TYPE"
	fi
	git tag $(sanitise_git_ref "$DEBIAN_TAG_PREFIX$PACKAGE_VERSION")

    done

    rm -rf "$CACHE_DIR"
    echo "All done!"
    exit 0
fi


# We really need the packages in the order dpkg thinks they are in here, and
# the only way to reliably know that is to ask dpkg what it thinks.  Since that
# is a rather expensive operation, and the number of operations to be performed
# grows rapidly as the number of packages to import gets longer, we must do the
# only sane thing feasible, and cheat.
#
# By doing a fast lexical pre-sort of the list, we can in almost all but the
# most pathological cases get the order almost, or even exactly, right.  So from
# that probable starting point, Simplicity, in her infinite wisdom, will reward
# all those people who numbered their packages sanely, with O(n) or near to it
# performance in determining the Proper order if we just do a trivial insertion
# sort for this next step.
#
# Pros:
# The Good People will get better results than the most fancy patent pending
# product of a college education algorithm is likely to do, and the Bad People
# will get the time-squared in pergatory that they deserve.
#
# Cons:
# Had you inferred all this from the 4 lines of code below?
#
# Todo:
# Maybe output a warning that this could take a while if $count > N.
# Determine N.

P=( $PACKAGE_DIFFS )
count=${#P[*]}

for(( i=1; i < count; ++i )) do
    j=$i
    #echo "was $i: ${P[i]}"
    while (($j)) && compare_diffs "${P[j-1]}" "${P[i]}"; do ((--j)) || true; done
    ((i==j)) || P=( ${P[@]:0:j} ${P[i]} ${P[j]} ${P[@]:j+1:i-(j+1)} ${P[@]:i+1} )
done
#for(( i=1; i < count; ++i )) do echo "now $i: ${P[i]}"; done

PACKAGE_DIFFS="${P[@]}"

for f in $PACKAGE_DIFFS; do

    case "$f" in
	*.diff.gz)
	    DIFF_TYPE="diff.gz"
	    ;;

	*.debian.tar.gz | *.debian.tar.bz2 | *.debian.tar.xz)
	    DIFF_TYPE="debian.tar.$(get_compression_type $f)"
	    ;;
    esac

    DEBIAN_VERSION="$(strip_debian_suffix $f)"
    DEBIAN_VERSION="${DEBIAN_VERSION##*_}"
    UPSTREAM_VERSION="${DEBIAN_VERSION%-*}"

    if [ -z "$LAST_UPSTREAM_VERSION" ]; then
	ORIG_TYPE="$(get_orig_type "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar")"

	echo "Initial import of $PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE"

	unpack_tarball "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE" \
		       "${PACKAGE_NAME}-${UPSTREAM_VERSION}.tmp"
	copy_sourcedir "${PACKAGE_NAME}-${UPSTREAM_VERSION}.tmp"

	DATE="$(file -L $PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE	\
		| sed -n "s/.*, last modified: \([^,]*\),*.*/\1/p")"

	git add .
	if git --no-pager commit --dry-run -a > /dev/null 2>&1; then
	    GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
		git commit -a -m "git-debimport ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE"
	else
	    # This particular case probably should still crap out, if the
	    # initial orig is empty that doesn't bode well for things to come
	    echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE"
	fi
	git checkout -b "$UPSTREAM_BRANCH"
	git tag $(sanitise_git_ref "$UPSTREAM_TAG_PREFIX$UPSTREAM_VERSION")

	LAST_UPSTREAM_VERSION="$UPSTREAM_VERSION"

	git checkout "$DEBIAN_BRANCH"
    fi

    if [ "$LAST_UPSTREAM_VERSION" != "$UPSTREAM_VERSION" ]; then
	ORIG_TYPE="$(get_orig_type "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar")"

	echo "Importing $PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE"

	git checkout "$UPSTREAM_BRANCH"

	find -maxdepth 1 -mindepth 1 \! -name ".git" -exec rm -rf '{}' +
	unpack_tarball "$PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE" \
		       "${PACKAGE_NAME}-${UPSTREAM_VERSION}.tmp"
	copy_sourcedir "${PACKAGE_NAME}-${UPSTREAM_VERSION}.tmp"

	DATE="$(file -L $PKG_ROOT$PACKAGES_DIR/${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE	\
		| sed -n "s/.*, last modified: \([^,]*\),*.*/\1/p")"

	git add .
	if git --no-pager commit --dry-run -a > /dev/null 2>&1; then
	    GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
		git commit -a -m "git-debimport ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE"
	else
	    echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${UPSTREAM_VERSION}.orig.tar.$ORIG_TYPE"
	fi
	git tag $(sanitise_git_ref "$UPSTREAM_TAG_PREFIX$UPSTREAM_VERSION")

	LAST_UPSTREAM_VERSION="$UPSTREAM_VERSION"

	git checkout "$DEBIAN_BRANCH"
	[ -n "$late_merge" ] || git merge "$UPSTREAM_BRANCH"

	#XXX If we were to just always merge here with -s ours, would that
	#    avoid the chance of merge conflicts but still keep the accurate
	#    history once we've fixed up the branch below?
	#    Try this when we find a set of packages that do have conflicts.
    fi

    echo "Importing $f"

    find -maxdepth 1 -mindepth 1 \! -name ".git" -exec rm -rf '{}' +
    copy_sourcedir "${PACKAGE_NAME}-${UPSTREAM_VERSION}.tmp"

    case "$f" in
	*.diff.gz)
	    zcat "$PKG_ROOT$f" | patch -p1
	    ;;

	*.debian.tar.gz | *.debian.tar.bz2 | *.debian.tar.xz)
	    tar -xf "$PKG_ROOT$f"
	    ;;
    esac
    chmod 755 debian/rules

    DATE=$(dpkg-parsechangelog | sed -n 's/Date: //p')
    AUTHOR=$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f1)
    EMAIL=\<$(dpkg-parsechangelog | sed -n 's/Maintainer: //p' | cut -d\< -f2)

    git add .
    if git --no-pager commit --dry-run -a > /dev/null 2>&1; then
	GIT_AUTHOR_NAME="$AUTHOR" GIT_COMMITTER_NAME="$AUTHOR" \
	GIT_AUTHOR_EMAIL="$EMAIL" GIT_COMMITTER_EMAIL="$EMAIL" \
	GIT_AUTHOR_DATE="$DATE" GIT_COMMITTER_DATE="$DATE" \
	    git commit -a -m "git-debimport ${PACKAGE_NAME}_${DEBIAN_VERSION}.$DIFF_TYPE"
    else
	echo "WARNING: nothing to commit for ${PACKAGE_NAME}_${DEBIAN_VERSION}.$DIFF_TYPE"
    fi
    git tag $(sanitise_git_ref "$DEBIAN_TAG_PREFIX$DEBIAN_VERSION")

done

rm -rf "$CACHE_DIR"
[ -z "$late_merge" ] || git merge -s ours "$UPSTREAM_BRANCH"

echo "All done!"

# vi:sts=4:sw=4:noet:foldmethod=marker
