#!/bin/sh -ef
#
# Copyright (C) 2003-2008  Dmitry V. Levin <ldv@altlinux.org>
# Copyright (C) 2007,2009  Alexey Tourbin <at@altlinux.org>
# 
# Contents index routines for hsh-initroot
#
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
#

# Contents index maps filenames into package names.  Currently, we use is it
# to make dependencies on executable commands (such as found in shell scripts).
#
# Contents index is created as follows.
#
# 1) The list of sources.list components, called parts_list, is initialized.
# Currently, two types of sources.list components are supported: traditional
# pkglist-based repositories and newer rpm-dir local repositories.
#
# 2) For each sources.list component, its contents part is created: pkglist
# files are processed with pkglist-query, and rpm-dir local repositories are
# scanned with rpmquery.
#
# 3) Contents parts are merged into a single file called contents_index_bin.
# During the merge, duplicate filename entries (which map to different package
# names) are folded into a single entry, to produce raw file-level dependencies.
#
# A number of optimizations are performed to avoid extra work.
# The following flag indicates if the final merge is required.
regen_contents=

# Create tmpdir/parts_list.
init_contents_state()
{
	"$aptbox"/apt-cache -q --no-generate -o APT::Cache::DumpPackages=false dump |
		# File ending with "/" is rpm-dir; it is represented by its full path.
		# Otherwise, it might be pkglist, for which we use pkglist basename.
		LC_ALL=C sed -n '/^File: \//{s/^File: //;/\/$/p;s/.*\///;/_pkglist/p}' |
		LC_ALL=C sort -u >"$tmpdir"/parts_list

	[ -s "$tmpdir"/parts_list ] || fatal 'Failed to initialize contents state.'

	# If parts_list has changed, contents index should be re-merged.
	[ -n "$regen_contents" ] || cmp -s {"$contents_dir","$tmpdir"}/parts_list || regen_contents=1

	verbose 'Initialized contents state.'
}

# Directories and their order.
contents_index_bin_dirs=/bin:/sbin:/usr/bin:/usr/sbin:/usr/X11R6/bin:/usr/games:/usr/lib/kde4/bin:/usr/lib/kde4bin

# Create and install contents part.
create_contents_part()
{
	local part="$1"; shift
	local tab; tab=$(printf '\t')

	# Some files, such as alternatives, are not packaged explicitly,
	# but instead are provided via Provides.  They are handled specially.

	# Since pkglist files do not provide SHA1HEADER, we use NVRA to identify a package.
	local NVRA='%{NAME}-%{VERSION}-%{RELEASE}.%{ARCH}'
	# Packaged files are usually mapped to package names.
	local Q1="[%{FILENAMES}\t%{NAME}\t$NVRA\n]"
	# Alternatives-like virtual paths should map to themselves.
	local Q2="[%{PROVIDENAME}\t%{PROVIDENAME}\t$NVRA\n]"

	# Prepare egrep regular expression.
	# We use egrep because it seems to be the fastest tool.
	# And our special "leading slash" trick makes it even faster.
	local dirs_or; dirs_or=$(printf %s "$contents_index_bin_dirs" |sed 's@^/@@;s@:/@|@g')
	local RE="^/($dirs_or)/[^/]+$tab"

	local ref dir=
	if [ -n "${part##*/*}" ]; then
		ref=$pkglists_dir/$part
	else
		dir=$part
		ref=$part
		part=$(printf %s "$part" |tr / _)
	fi

	if [ -n "$dir" ]; then
		# subshell
		cd "$dir"
		find . -mindepth 1 -maxdepth 1 -name '*.rpm' -exec \
			rpmquery --qf "$Q1$Q2" -p -- '{}' '+'
	else
		pkglist-query "$Q1$Q2" "$ref"
	fi |
		LC_ALL=C egrep "$RE" >"$tmpdir"/part || [ $? -eq 1 ]

	if [ -s "$tmpdir"/part ]; then

		# Files kill provides, except for virutal paths.
		LC_ALL=C sort -t"$tab" -u -k1,1 -k3,3 -o "$tmpdir"/part{,}

		# Discard NVRA.
		cut -f1,2 <"$tmpdir"/part >"$tmpdir"/part+
		mv -f "$tmpdir"/part+ "$tmpdir"/part

		# Fold identical records and prepare for later 'sort -m'.
		LC_ALL=C sort -u -o "$tmpdir"/part{,}
	fi

	# The part has been created from scratch, but it is still rather
	# possible that there is no actual change, which can save us a merge.
	[ -n "$regen_contents" ] || cmp -s "$parts_dir/$part" "$tmpdir"/part || regen_contents=1

	# Bless and install the part.
	touch -r "$ref" "$tmpdir"/part
	mv -f "$tmpdir"/part "$parts_dir/$part"

	[ -s "$parts_dir/$part" ] &&
		verbose "Created contents part for ${dir:-$part}." ||
		verbose "Created empty contents part for ${dir:-$part}."
}

contents_match_timestamp()
{
	# 1=ref 2=contents
	[ -e "$1" ] && [ -f "$2" ] || return 1
	[ "$1" -nt "$2" ] || [ "$1" -ot "$2" ] || return 0
	return 1
}

valid_contents_part()
{
	local part="$1"; shift
	local dir=
	if [ -n "${part##*/*}" ]; then
		set -- "$pkglists_dir/$part" "$parts_dir/$part"
	else
		dir=$part
		part=$(printf %s "$part" |tr / _)
		set -- "$dir" "$parts_dir/$part"
	fi
	if contents_match_timestamp "$1" "$2"; then
		verbose "Contents part for ${dir:-$part} is up to date."
	else
		return 1
	fi
}

# Merge contents parts into tmpdir/contents_index_bin.
merge_contents_parts()
{
	# Prepare argv for 'sort -m'.
	local part
	set --
	while read -r part; do
		[ -n "${part##*/*}" ] ||
			part=$(printf %s "$part" |tr / _)
		set -- "$@" "$parts_dir/$part"
	done <"$tmpdir"/parts_list
	[ $# -gt 0 ]

	# Note that parts are sorted in ascii order, so that 'sort -m' is possible.
	# However, for contents index, directories should be reordered.
	local dir
	for dir in $(IFS=:; echo $contents_index_bin_dirs); do
		LC_ALL=C sort -m -u "$@" |
			LC_ALL=C grep "^$dir/" >"$tmpdir"/dir || [ $? -eq 1 ]
		if [ ! -s "$tmpdir"/dir ]; then
			verbose "No contents index entries for $dir."
			continue
		fi
		# Fold dups: change adjacent lines
		#	/usr/bin/r	pkgA
		#	/usr/bin/r	pkgB
		# into
		#	/usr/bin/r	/usr/bin/r
		# This will also exclude filenames with spaces.
		LC_ALL=C awk <"$tmpdir"/dir >>"$tmpdir"/contents_index_bin '
			NF==2 {
				if ($1==f)
					p = f
				else {
					if (f)
						print f "\t" p
					f = $1
					p = $2
				}
			}
			END {
				if (f)
					print f "\t" p
			}'
		verbose "Added contents index entries for $dir."
	done

	[ -s "$tmpdir"/contents_index_bin ] ||
		fatal 'Created empty contents index.'

	touch -r "$tmpdir"/parts_list "$tmpdir"/contents_index_bin
	verbose 'Created contents index.'
}

# Copy file, use cp -l if possible
cp_l_file_from_to()
{
	local from="$1"; shift
	local to="$1"; shift
	local cp_args dev_from dev_to

	dev_from="$(stat -c '%d' -- "$from")"
	dev_to="$(stat -c '%d' -- "${to%/*}/")"
	[ "$dev_from" = "$dev_to" ] && cp_args=-l || cp_args=
	cp -f $cp_args $verbose -- "$from" "$to" >&2
}

# Entry point: create and install contents_index_bin.
# Cache data is not reused, new cache is rebuilt from scratch.
create_contents()
{
	local contents_dir="${cache_dir:?}"/contents
	local tmpdir="$contents_dir"/tmp
	rm -rf "$tmpdir"
	# This will also check if $contents_dir exists.
	mkdir "$tmpdir"

	local parts_dir="$contents_dir"/parts
	mkdir -p $verbose "$parts_dir" >&2

	local pkglists_dir=
	get_apt_config Dir::State::lists/d pkglists_dir
	[ -z "${pkglists_dir##/*}" ] && [ -d "$pkglists_dir" ] ||
		fatal "apt-config: broken Dir::State::lists: $pkglists_dir."

	# Will merge anyway, disable optimizations.
	regen_contents=1

	local part
	init_contents_state </dev/null
	while read -r part; do
		create_contents_part "$part" </dev/null
	done <"$tmpdir"/parts_list

	merge_contents_parts </dev/null
	mv -f {"$tmpdir","$contents_dir"}/parts_list
	mv -f {"$tmpdir","$contents_dir"}/contents_index_bin

	rm -rf "$tmpdir"

	cp_l_file_from_to {"$contents_dir",chroot/.host}/contents_index_bin
	contents_index_bin=chroot/.host/contents_index_bin
}

# Entry point: update and install contents_index_bin.
# Cache data is reused to validate and possibly rebuild the cache.
update_contents()
{
	local contents_dir="${cache_dir:?}"/contents
	local tmpdir="$contents_dir"/tmp
	rm -rf "$tmpdir"
	# This will also check if $contents_dir exists.
	mkdir "$tmpdir"

	local parts_dir="$contents_dir"/parts
	mkdir -p $verbose "$parts_dir" >&2

	local pkglists_dir=
	get_apt_config Dir::State::lists/d pkglists_dir
	[ -z "${pkglists_dir##/*}" ] && [ -d "$pkglists_dir" ] ||
		fatal "apt-config: broken Dir::State::lists: $pkglists_dir."

	# Check if the cache can be reused as is.
	[ -s "$contents_dir"/contents_index_bin ] &&
		contents_match_timestamp "$contents_dir"/{parts_list,contents_index_bin} ||
		regen_contents=1

	local part
	init_contents_state </dev/null
	while read -r part; do
		valid_contents_part "$part" ||
		create_contents_part "$part" </dev/null
	done <"$tmpdir"/parts_list

	if [ -z "$regen_contents" ]; then
		verbose 'Contents index is up to date.'
	else
		merge_contents_parts </dev/null
		mv -f {"$tmpdir","$contents_dir"}/parts_list
		mv -f {"$tmpdir","$contents_dir"}/contents_index_bin
	fi

	rm -rf "$tmpdir"

	cp_l_file_from_to {"$contents_dir",chroot/.host}/contents_index_bin
	contents_index_bin=chroot/.host/contents_index_bin
}
