#!/bin/sh -efu
#
# Copyright (C) 2006-2009  Dmitry V. Levin <ldv@altlinux.org>
# Copyright (C) 2006  Alexey Gladkov <legion@altlinux.org>
#
# Incrementally import source packages to git repository.
#
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
#

. gear-sh-functions

print_version()
{
	cat <<EOF
$PROG version $PROG_VERSION
Written by Dmitry V. Levin <ldv@altlinux.org>

Copyright (C) 2006-2009  Dmitry V. Levin <ldv@altlinux.org>
Copyright (C) 2006  Alexey Gladkov <legion@altlinux.org>
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOF
	exit
}

show_help()
{
	cat <<EOF
$PROG - incrementally import source packages to git repository.

Usage: $PROG [options] <source-package>...
or:    $PROG [options] --stdin

Options:
  --branch=BRANCH           branch name where to import (default is "srpms");
  --import-only             do not perform a checkout after importing;
  --no-unpack               do not unpack source archives;
  --rules=FILENAME          gear rules filename (default is ".gear/rules");
  --spec-pattern=PATTERN    specfile pattern list (default is "*.spec");
  --stdin                   read source package names from stdin;
  -q, --quiet               try to be more quiet;
  -v, --verbose             print a message for each action;
  -V, --version             print program version and exit;
  -h, --help                show this text and exit.

Report bugs to http://bugzilla.altlinux.org/

EOF
	exit
}

tmpdir=
cleanup_handler()
{
	[ -n "$tmpdir" ] || return 0
	cd "$saved_cwd"
	# Recover index file.
	if [ -z "$index_already_recovered" ]; then
		[ -f "$tmpdir/index" ] &&
			cp -p "$tmpdir/index" "$index_orig" ||
			rm -f "$index_orig" ||:
	fi
	rm -rf -- "$tmpdir"
	# Recover HEAD.
	[ "$branch_orig" = "refs/heads/$branch_import" ] ||
		git symbolic-ref HEAD "$branch_orig"
}

# Heuristically shorten tar directory name.
shorten_dir_name()
{
	local name="$1"; shift
	local short
	short="$(printf %s "$name" |
		 LC_COLLATE=C sed 's/\(-[[:alpha:]]*[0-9]\+\([.[:alpha:]]\+[.[:alnum:]]*\)\?\)\+$//')"
	if [ -n "$short" -a "$short" != "$name" ] &&
	   [ ! -e "$short" ]; then
		name="$short"
	fi

	printf %s "$name"
}

optimize_rules()
{
	local rules
	rules="$1" && shift
	[ -s "$rules" ] || return 0

	tmp="$(mktemp rules.XXXXXX)"
	sed -n 's/^[^:]\+: [^ ]*\.\([^ .]\+\).*/\1/p' "$rules" |
		LC_COLLATE=C sort -u |
		while read suffix; do
			local quoted
			quoted="$(quote_sed_regexp "$suffix")"
			method="$(sed -n 's/^\([^:]\+\): [^ ]*\.'"$quoted"'$/\1/p' "$rules" |LC_COLLATE=C sort -u)"
			case "$method" in
				copy|gzip|bzip2|xz)
					printf '%s: *.%s\n' "$method" "$suffix"
					;;
				*)
					sed -n 's/^[^:]\+: [^ ]*\.'"$quoted"'$/&/p' "$rules"
					;;
			esac
		done >"$tmp"
	sed -n 's/^[^:]\+: \([^ ]\+ .*\|[^ .]\+$\)/&/p' "$rules" >>"$tmp"
	mv "$tmp" "$rules"
	git update-index --add ${verbose:+--verbose} -- "$rules"
}

write_copy_rule()
{
	local method="$1"; shift

	printf '%s: %s\n' "$method" "$*" >>"$gear_rules"
	git update-index --add ${verbose:+--verbose} -- "$@"
}

list_tar_toplevel_nondir()
{
	local f="$1"; shift

	tar -tvf "$f" |
		sed -n 's/^[^d]\([^[:space:]]\+[[:space:]]\+\)\{5\}\([^/]\+\)$/\2/p'
}

list_zip_toplevel_nondir()
{
	local f="$1"; shift

	zipinfo --h-t -- "$f" |
		sed -n 's/^[^d]\([^[:space:]]\+[[:space:]]\+\)\{8\}\([^/]\+\)$/\2/p'
}

spec_version=
import_tree()
{
	local method_tree="$1"; shift
	local method_copy="$1"; shift
	local f="$1"; shift

	if [ -n "$no_unpack" ]; then
		# If archive unpacking is disabled, then store it as a file.
		import_file 1 "$method_copy" "$f"
		return
	fi

	# Define archive list command and related parameters.
	local source_ls_cmd source_ls_toplevel_nondir_cmd
	local arch_basename method_pack rule_suffix=
	case "$method_tree" in
		tar)
			source_ls_cmd='tar -tf'
			source_ls_toplevel_nondir_cmd=list_tar_toplevel_nondir
			case "$method_copy" in
				bzip2) method_pack=tar.bz2 ;;
				gzip) method_pack=tar.gz ;;
				xz) method_pack=tar.xz ;;
				*) method_pack=tar ;;
			esac
			case "$f" in
				*.tar)
					arch_basename="${f%.tar}"
					[ "$method_pack" = tar ] ||
						rule_suffix=' suffix=.tar'
					;;
				*.tar.bz2)
					arch_basename="${f%.tar.bz2}"
					[ "$method_pack" = tar.bz2 ] ||
						rule_suffix=' suffix=.tar.bz2'
					;;
				*.tar.gz)
					arch_basename="${f%.tar.gz}"
					[ "$method_pack" = tar.gz ] ||
						rule_suffix=' suffix=.tar.gz'
					;;
				*.tar.xz)
					arch_basename="${f%.tar.xz}"
					[ "$method_pack" = tar.xz ] ||
						rule_suffix=' suffix=.tar.xz'
					;;
				*.tbz)
					arch_basename="${f%.tbz}"
					rule_suffix=' suffix=.tbz'
					;;
				*.tbz2)
					arch_basename="${f%.tbz2}"
					rule_suffix=' suffix=.tbz2'
					;;
				*.tgz)
					arch_basename="${f%.tgz}"
					rule_suffix=' suffix=.tgz'
					;;
				*.txz)
					arch_basename="${f%.txz}"
					rule_suffix=' suffix=.txz'
					;;
				*)
					arch_basename="$f"
					rule_suffix=' suffix='
					;;
			esac
			;;
		zip)
			source_ls_cmd='zipinfo -1 --'
			source_ls_toplevel_nondir_cmd=list_zip_toplevel_nondir
			method_pack=zip
			case "$f" in
				*.zip)	arch_basename="${f%.zip}"
					;;
				*)	arch_basename="$f"
					rule_suffix=' suffix='
					;;
			esac
			;;
		*) fatal "import_tree: $method_tree: unrecognized method"
			;;
	esac

	# Fetch archive list.
	local tree_list
	if ! tree_list="$($source_ls_cmd "$f")" ||
	   [ -z "$tree_list" ]; then
		# If archive is empty or currupted, then store it as a file.
		import_file 1 "$method_copy" "$f"
		return
	fi

	# Avoid unpacking archives with paths containing /../
	local dir_name=
	printf %s "$tree_list" |LC_ALL=C egrep -qs '^\.\.(/|$)|/\.\.(/|$)' ||
		dir_name="$(printf %s "$tree_list" |
			    tr -s / |			# squeeze slashes
			    sed 's|^/|.&|' |		# prefix leading slash with dot
			    sed 's|^\(\./\)\+||g' |	# strip leading "./"
			    sed -n 's|^\([^/]\+\)\(/.*\)\?$|\1|p' |
			    LC_COLLATE=C sort -u)"
	if [ -z "$dir_name" ]; then
		# If archive dirlist is empty, then store archive as a file.
		import_file 1 "$method_copy" "$f"
		return
	fi

	local base= subdir unpack_into_subdir=
	if [ "$dir_name" != "$(printf %s "$dir_name" |LC_COLLATE=C tr -d '[:space:]')" ] ||
	   [ -n "$(printf %s "$dir_name" |LC_COLLATE=C tr -d -- '-[:alnum:]_.,+')" ] ||
	   [ -n "$($source_ls_toplevel_nondir_cmd "$f")" ]; then
		# More than one subdir, funny dir name, or non-dir toplevel files.
		unpack_into_subdir=1
		dir_name="$arch_basename"
	else
		base="$dir_name"
	fi

	local subdir broken=
	subdir="$(mktemp -d subdir.XXXXXX)"

	case "$method_tree" in
		tar) tar -x -C "$subdir" -f "$f" ;;
		zip) unzip -q "$f" -d "$subdir" ;;
	esac || broken=1
	chmod -Rf u+rwX,go-w "$subdir" || broken=1

	if [ -z "$broken" ]; then
		# Add .gitattributes export-ignore file to each empty directory.
		echo '.gitattributes export-ignore' >"$tmpdir/export-ignore" &&
		find "$subdir" -type d -empty -exec ln -- "$tmpdir/export-ignore" '{}/.gitattributes' ';' ||
			broken=1
	fi

	if [ -n "$broken" ]; then
		# Archive is broken, store it as a file.
		rm -rf "$subdir"
		import_file 1 "$method_copy" "$f"
		return
	fi
	rm "$f"

	# Apply heuristics to shorten directory name.
	local short_name
	short_name="$(shorten_dir_name "$dir_name")"

	# Avoid file names clashes.
	if [ -e "$short_name" -a "$dir_name" != "$arch_basename" ]; then
		# Try to shorten archive name.
		short_name="$(shorten_dir_name "$arch_basename")"
	fi

	if [ -e "$short_name" ]; then
		# Shorten heuristics failed, try with full directory name.
		short_name="$dir_name"
		if [ -e "$short_name" -a "$dir_name" != "$arch_basename" ]; then
			# Try with full archive name.
			short_name="$arch_basename"
		fi
	fi

	if [ -e "$short_name" ]; then
		# Full name also clashes, so fall back to file name.
		short_name="$f"
	fi

	# Move temporary directory to final place.
	if [ -n "$unpack_into_subdir" ]; then
		mv "$subdir" "$short_name"
	else
		mv "$subdir/$dir_name" "$short_name"
		rmdir "$subdir"
	fi
	dir_name="$short_name"

	local quoted rule_base= rule_name=
	quoted="$(quote_sed_regexp "$spec_version")"

	[ "$dir_name-$spec_version" = "$arch_basename" ] ||
		rule_name=" name=$arch_basename"

	[ "$arch_basename" = "$base" ] ||
		rule_base=" base=$base"

	printf '%s: %s%s%s%s\n' "$method_pack" "$dir_name" "$rule_name" "$rule_base" "$rule_suffix" |
		sed "s/${quoted:-@version@}/@version@/g" >>"$gear_rules"

	git ls-files -z --others --modified -- "$dir_name" |
		git update-index --add ${verbose:+--verbose} -z --stdin
}

import_file()
{
	local no_unpack="$1"; shift
	local method="$1"; shift

	if [ "$method" = copy ]; then
		write_copy_rule copy "$@"
		return
	fi

	local f="$1"; shift

	# If archive unpacking is enabled, then
	# look for file type inside compressed file.
	local file_type=
	[ -n "$no_unpack" ] ||
		file_type="$(file -bz -- "$f")" ||:

	case "$file_type" in
		*tar\ archive*)
			import_tree tar "$method" "$f"
			;;
		*)
			case "$f,$method" in
				*.bz2,bzip2)
					# if target file exists or uncompression fails,
					# then just copy file as is.
					if [ ! -e "${f%.bz2}" ] && bunzip2 "$f"; then
						write_copy_rule bzip2 "${f%.bz2}"
					else
						write_copy_rule copy "$f"
					fi
					;;
				*.gz,gzip)
					# if target file exists or uncompression fails,
					# then just copy file as is.
					if [ ! -e "${f%.gz}" ] && gunzip "$f"; then
						write_copy_rule gzip "${f%.gz}"
					else
						write_copy_rule copy "$f"
					fi
					;;
				*.xz,xz)
					# if target file exists or uncompression fails,
					# then just copy file as is.
					if [ ! -e "${f%.xz}" ] && unxz "$f"; then
						write_copy_rule xz "${f%.xz}"
					else
						write_copy_rule copy "$f"
					fi
					;;
				*)
					# gear copy rules do not support file name
					# transformation, so just copy it as is.
					write_copy_rule copy "$f"
					;;
			esac
			;;
	esac
}

git_commit_fast=
import()
{
	local srpm="$1" && shift
	cd "$saved_cwd"
	verbose "Processing $srpm"
	srpm="$(readlink -ev -- "$srpm")"

	local header name verrel buildtime filelist changelogname changelog author_name author_email
	header="$(od -A n -N 8 -t x1 -- "$srpm")" &&
	[ -n "$header" -a -z "${header## ed ab ee db ?? ?? 00 01}" ] &&
	name="$(rpmquery -p --qf '%{NAME}' -- "$srpm")" &&
	verrel="$(rpmquery -p --qf '%{VERSION}-%{RELEASE}' -- "$srpm")" &&
	buildtime="$(rpmquery -p --qf '%{BUILDTIME}' -- "$srpm")" &&
	changelogname="$(rpmquery -p --qf '%|changelogname?{%{changelogname}}|' -- "$srpm")" &&
	changelog="$(rpmquery -p --qf '%|CHANGELOGTEXT?{%{CHANGELOGTEXT}}|' -- "$srpm")" &&
	filelist="$(rpm2cpio "$srpm" |cpio --list --quiet )" ||
		fatal "$srpm: Not a valid source rpm package"

	author_name="$(printf '%s\n' "$changelogname" |
		sed -n 's/^\([^<]\+\)<[^@]\+\(@\| at \)[^@]\+>.*/\1/p' |
		sed -e 's/^[[:space:]]\+//' -e 's/[[:space:]]\+$//')"
	author_email="$(printf '%s\n' "$changelogname" |
		sed -n 's/^[^<]*<\([^@]\+\(@\| at \)[^@]\+\)>.*/\1/p' |
		sed -e 's/^[[:space:]]\+//' -e 's/[[:space:]]\+$//' |
		sed s/alt-linux/altlinux/g)"
	local GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL
	if [ -n "$author_name" ]; then
		GIT_AUTHOR_NAME="$author_name"
		export GIT_AUTHOR_NAME
	fi
	if [ -n "$author_email" ]; then
		GIT_AUTHOR_EMAIL="$author_email"
		export GIT_AUTHOR_EMAIL
	fi

	# Use own directory for checkout.
	rm -rf "$workdir"
	mkdir "$workdir"
	cd "$workdir"
	rpm2cpio "$srpm" |cpio --extract $([ -n "$verbose" ] || echo --quiet)
	verbose "Unpacked ${srpm##*/}"

	[ -n "${gear_rules##*/*}" ] ||
		mkdir -p -- "${gear_rules%/*}"

	# Find specfile.
	spec_version=
	local spec=
	if spec="$(find_specfile_in_cwd "${srpm##*/}: " ${filelist})"; then
		[ -z "${spec##$def_spec_pattern}" -o -z "${spec%%$def_spec_pattern}" ] ||
			printf 'spec: %s\n' "$spec" >>"$gear_rules"
		spec_version="$(sed '/^version:[[:space:]]*/I!d;s///;q' "$spec" |sed 's/[[:space:]]\+$//')"
	fi

	rm -f "$index_orig"
	local f
	for f in ${filelist}; do
		if [ -L "$f" -o ! -f "$f" ]; then
			message "$f: Non-regular file ignored."
			continue
		fi
		if [ "$f" = "$spec" ]; then
			git update-index --add ${verbose:+--verbose} -- "$f"
			continue
		fi
		if [ "$f" = "$gear_rules" ]; then
			message "$f: rules file $gear_rules ignored."
			continue
		fi
		local file_type
		file_type="$(file -b -- "$f")"
		case "$file_type" in
			*tar\ archive*)
				import_tree tar copy "$f"
				;;
			*Zip\ archive*)
				import_tree zip copy "$f"
				;;
			bzip2\ compressed*)
				import_file "$no_unpack" bzip2 "$f"
				;;
			gzip\ compressed*)
				import_file "$no_unpack" gzip "$f"
				;;
			xz\ compressed*)
				import_file "$no_unpack" xz "$f"
				;;
			*)
				import_file "$no_unpack" copy "$f"
				;;
		esac
	done

	optimize_rules "$gear_rules"

	local message
	message="$(printf '%s\n\n%s\n' "$verrel" "$changelog")"

	if [ -z "$git_commit_fast" ]; then
		git commit --h 2>&1 |fgrep -qs '[--fast]' &&
			git_commit_fast='git commit --fast' ||
			git_commit_fast='git commit'
	fi

	TZ=UTC faketime -d "1970-01-01 $buildtime seconds" -- \
		$git_commit_fast --no-verify ${quiet:+-q} -a -m "$message" ||
			fatal "Failed to commit $srpm"
	verbose "Committed $name $verrel"
	TZ=UTC faketime -d "1970-01-01 $buildtime seconds" -- \
		git tag -a -m "$name $verrel" "$verrel" ||:
	[ -n "${quiet-}" ] || message "Imported $srpm"
}

TEMP=`getopt -n $PROG -o h,q,v,V -l branch:,import-only,no-unpack,no-untar,rules:,spec-pattern:,stdin,help,quiet,verbose,version -- "$@"` ||
	show_usage
eval set -- "$TEMP"

branch_import=srpms
import_only=
no_unpack=
read_from_stdin=
gear_rules='.gear/rules'
while :; do
	case "$1" in
		--) shift; break
			;;
		--branch) shift; branch_import="$1"
			;;
		--import-only) import_only=1
			;;
		--no-unpack|--no-untar) no_unpack=1
			;;
		--rules) shift; gear_rules="$1"
			;;
		--spec-pattern) shift; spec_pattern="$1"
			;;
		--stdin) read_from_stdin=1
			;;
		-h|--help) show_help
			;;
		-q|--quiet) quiet=-q; verbose=
			;;
		-v|--verbose) verbose=-v; quiet=
			;;
		-V|--version) print_version
			;;
		*) fatal "Unrecognized option: $1"
			;;
	esac
	shift
done

if [ -n "$read_from_stdin" ]; then
	# No arguments, please.
	[ "$#" -eq 0 ] ||
		show_usage 'Too many arguments.'
else
	# At least one argument, please.
	[ "$#" -ge 1 ] ||
		show_usage 'Not enough arguments.'
fi

GIT_DIR="$(git rev-parse --git-dir)"
GIT_DIR="$(readlink -ev -- "$GIT_DIR")"
export GIT_DIR

branch_orig="$(git symbolic-ref HEAD)"
head_orig="$(git rev-parse --verify HEAD 2>/dev/null ||:)"

unset GIT_INDEX_FILE ||:
index_orig="$GIT_DIR/index"
index_already_recovered=

# Change to toplevel directory
chdir_to_toplevel

# Save current work directory.
saved_cwd="$(/bin/pwd)"

install_cleanup_handler cleanup_handler
tmpdir="$(mktemp -dt "$PROG.XXXXXXXX")"
workdir="$tmpdir/work"

# Backup index file.
[ ! -f "$index_orig" ] ||
	cp -p "$index_orig" "$tmpdir/index"

git symbolic-ref HEAD "refs/heads/$branch_import"

if [ -n "$read_from_stdin" ]; then
	while read REPLY; do
		import "$REPLY"
	done
else
	for REPLY; do
		import "$REPLY"
	done
fi

# Merge after import.
cd "$saved_cwd"
head_new="$(git rev-parse --verify HEAD 2>/dev/null ||:)"
if [ "$head_orig" = "$head_new" ]; then
	verbose "Nothing imported."
elif [ -z "$import_only" ]; then
	if [ -z "$head_orig" ]; then
		git update-ref "$branch_orig" "refs/heads/$branch_import"
		message "Created ${branch_orig#refs/heads/} branch"
		git symbolic-ref HEAD "$branch_orig"
		git checkout -f
		index_already_recovered=1
	elif [ -n "$head_new" ]; then
		git symbolic-ref HEAD "$branch_orig"
		git checkout -f
		index_already_recovered=1
		if [ "$branch_orig" != "refs/heads/$branch_import" ]; then
			git pull ${quiet:+-n} . "refs/heads/$branch_import"
			message "Merged $branch_import branch into ${branch_orig#refs/heads/} branch"
		fi
	fi
fi
