#!/bin/bash

# git debcherry upstream [head]

function _time()  {
    if [ "$debug" = 'yes' ]; then
	echo "# time=" $(date "+%s.%N") " line=" $1 >&2
    fi
}

function usage() {
    printf "usage: $0 [options] upstream [head]\n"
    printf "\t -d | --debug\t\tprint debugging/profiling info\n"
    printf "\t -s | --stat\t\toutput patch statistics to stdout\n"
    printf "\t (-o | --output-directory) <dir> \toutput patches to <dir>\n"
}

function patch_id() {
    if [ -z "$(git diff-tree $1)" ]; then
	echo "empty"
    else
	local ident=$(git show $1 | git patch-id)
	ident=${ident% *}
	echo $ident
    fi
}

function debug (){
    if [ "$debug" = 'yes' ]; then
	printf "$@" >&2
    fi
}
function _cleanup (){
    if [ "$debug" = 'no' ]; then
	rm -rf "$tmpdir"
    fi
}

function _checkout (){
    local ref=$1
    git checkout -f $ref >$log 2>&1 || _die "checkout failed"
}

function test_apply (){
    local hash=$1
    local ref=$2
    local want_changes=$3
    local ret=0;

    debug "test_apply $hash $ref $want_changes\n"
    old_head=$(git rev-parse HEAD)

    _checkout $ref

    if ! git cherry-pick --no-commit $hash 1>/dev/null 2>&1 ; then
	debug "cherry-pick failed in test_apply\n"
	ret=1
    fi

    git diff --cached --quiet
    local has_changes=$?

    if [ "$has_changes" != "$want_changes" ]; then
	ret=1
    fi

    debug "test_apply %d %d %d\n"  $has_changes $want_changes $ret

    # wipe out any changes
    git reset --hard HEAD >$log 2>&1 || _die "reset failed"

    _checkout $old_head

    return $ret
}

# make a temporary branch.
# usage tmpbranch prefix treeish
function tmpbranch() {
    local git_dir=${GIT_DIR-$(pwd)/$(git rev-parse --git-dir)}
    local filtered=$(mktemp $git_dir/refs/heads/$1_XXXXXXXXXX)

# git update-ref is too fussy here.
    echo $(git rev-parse $2) > $filtered
    filtered=${filtered##$git_dir/}
    echo $filtered
}

function _die() {
    echo $1 >&2
    [ -s $log ] && cat $log
    exit 1
}

tmpdir=$(mktemp --tmpdir -d git-debcherry.XXXXXX)

log=$tmpdir/log
touch $log

stat_only="no"
patch_dir=""
debug="no"
while : ; do
    case $1 in
	-h | --help)
	    usage
	    exit 0
	    ;;
	-d | --debug)
	    debug="yes"
	    shift
	    ;;
	-s | --stat)
	    stat_only="yes"
	    shift
	    ;;
	-o | --output-directory)
	    patch_dir=$2
	    case $patch_dir in
		/*)
		    ;;
		*)
		    patch_dir=$(pwd)/$patch_dir
	    esac
	    shift;
	    shift;
	    ;;
	--)
	    shift
	    break
	    ;;
	-*)
	    echo "Unknown option $1" >&2
	    usage
	    exit 1
	    ;;
	*)
	break
	;;
    esac
done

if [ $# -lt 1 ]; then
    usage
    exit 1
fi


_time $LINENO

upstream=$1
if ! upstream_sha1=$(git rev-parse "$upstream" 2>$log); then
    _die "bad or missing ref: $upstream"
fi

head=${2-$(git symbolic-ref HEAD)}
head=${head##refs/heads/}
if ! head_sha1=$(git rev-parse "$head" 2>$log); then
    _die "bad or missing ref: $head"
fi

if [ "$stat_only" = "no" -a -n "$patch_dir" -a -e "$patch_dir" ]; then
    _die "$patch_dir exists, not overwriting"
fi

declare -A unmerged

orig_git_dir=${GIT_DIR-$(pwd)/$(git rev-parse --git-dir)}

git clone "$orig_git_dir" "$tmpdir"/clone 1>$log || _die "clone failed"
export GIT_WORK_TREE=$tmpdir/clone
export GIT_DIR=$tmpdir/clone/.git

tmp_upstream=$(tmpbranch upstream $upstream_sha1)
tmp_head=$(tmpbranch head $head_sha1)

_checkout $tmp_head

trap '_cleanup' EXIT

# Remove any traces of .pc (from quilt) and ./debian on temporary
# branches. This avoids conflicts when dpkg-source tries to apply
# quilt patches. --prune-empty means we are only dealing with commits
# that do somehow touch upstream.

_time $LINENO

{
# git-filter-branch doesn't really understand GIT_WORK_TREE
cd $GIT_WORK_TREE
if ! git filter-branch -f --prune-empty --index-filter \
	'git rm --ignore-unmatch --cached -r .pc debian' \
	"$tmp_upstream".."$tmp_head" 1>$log 2>&1 ; then
    _die "filtering failed"
fi
}

_time $LINENO

# for every commit reachable from head, but not from
# upstream compute its patch-id (essentially sha1 of diff)
# and save a map back to the commit.

while read -r hash ; do
    ident=$(patch_id $hash)
    unmerged[$ident]=$hash
done < <(git rev-list --no-merges  $tmp_upstream..$tmp_head )

_time $LINENO

# now delete any found upstream; note that this only gets exact
# matches, so partial application is not caught here

while read -r ident commit ; do
    unset unmerged[$ident]
done < <(git log --patch --no-merges $tmp_upstream| git patch-id)

_time $LINENO

printf 'debcherry fixup patch\n\n' >> $tmpdir/message

debug "Starting test reverts at %s\n"  $(git rev-parse HEAD)

initial_upstream=$(git rev-parse $tmp_upstream)

while read -r hash; do
    ident=$(patch_id $hash)
    if [ -n "${unmerged[$ident]}" ]; then
	if [ -z "$(git format-patch --no-binary --stdout -1 $hash | lsdiff)" ]; then
	    shorthash=$(git rev-parse --short $hash)
	    message=$(git log --pretty=format:"%s" -1 $hash)
	    printf "skipping commit  $shorthash <$ident>; empty or binary only.\n   $message\n\n" >&2
	    continue
	fi

	# the patch should cherry pick against head, but produce no
	# changes

	if ! test_apply $hash HEAD 0 ; then
	    git --no-pager log --oneline -1 $hash >> $tmpdir/message
	    printf "\t - extra changes or conflicts\n" >> $tmpdir/message
	    continue
	fi

	# the patch should apply to upstream and produce some changes.
	# XXX: note that this is a bit heuristic (i.e. wrong). It should
	# really check against something like upstream with all of the
	# patches so far applied.

	if ! test_apply $hash $initial_upstream 1 ; then
	    git --no-pager log --oneline -1 $hash >> $tmpdir/message
	    printf "\t - no changes against upstream or conflicts\n" >> $tmpdir/message
	    continue
	fi

	if git revert  --no-edit $hash 1>/dev/null 2>&1; then
	    echo "$hash" >> $tmpdir/patch-list
	else
	    git revert --abort
	    git --no-pager log --oneline -1 $hash >> $tmpdir/message
	    printf "\t - conflict" >> $tmpdir/message
	fi
    fi
done < <(git rev-list --no-merges --topo-order $tmp_upstream..$tmp_head)

# this is where we want to build our patch series
SAVED_HEAD=$(git rev-parse HEAD)

_checkout $(git rev-parse "$tmp_upstream")

# group file deletions
DELETED_FILES=$(git  diff --diff-filter=D --name-only $tmp_upstream $tmp_head);
if [ -n "$DELETED_FILES" ]; then
    git rm $DELETED_FILES
    git commit -m'File deletions'
fi

# HEAD is now like upstream, but with files deleted

base=$(git rev-parse HEAD)

git diff $base $SAVED_HEAD | filterdiff -x '[ab]/debian/*' --clean >> $tmpdir/diff

if [ -s $tmpdir/diff ]; then
    git apply --whitespace=nowarn --index $tmpdir/diff || \
	    _die "fatal: apply failed"
    git commit -F $tmpdir/message 1>/dev/null 2>&1 || \
	_die "fatal: fixup commit failed"
fi

if [ -s $tmpdir/patch-list ]; then
    while read -r hash ; do
	debug "trying %s\n" $hash

	if ! git cherry-pick --no-edit $hash 1>/dev/null 2>&1; then
	    if [ -z "$(git diff --cached)" ]; then
		echo "skipping $hash; empty cherry-pick" >&2
		continue
	    fi
	    _die "cherry-pick $hash failed"
	fi
	if [ -z "$(git diff $base)" ]; then
	    base=$(git rev-parse HEAD);
	    debug "new base ${base}"
	fi

    done < <(tac $tmpdir/patch-list)
fi

if [ $stat_only = "yes" ]; then
    git log --reverse --oneline --stat "$base"..HEAD
else
    if [ -n "$patch_dir" ]; then
	mkdir -p "$patch_dir" || _die "mkdir failed";
	echo "# exported from git by git-debcherry" > "$patch_dir/series"
	if PATCHES=$(git format-patch  -o "$patch_dir" "$base"..HEAD ); then
	    if [ -n "$PATCHES" ]; then
		echo "$PATCHES" | sed -e "s,$patch_dir/,,g" -e 's, ,\n,g' >> "$patch_dir/series"
	    else
		echo "Warning: no patches exported"
	    fi
	else
	    _die "git format-patch failed"
	fi
    else
	git format-patch --stdout $base..HEAD
    fi
fi

_time $LINENO
