#!/usr/bin/env bash
# Read the codepoints and format into blocks.
# Copyright © 2012 Ken Moffat
# covered by the MIT license, http://opensource.org/licenses/MIT

# The included names and ranges of the blocks are copyright
# 1991-2015 The Unicode Consortium
# http://www.unicode.org/copyright.html


# Note that ALL possible planes are listed - CODE2001 contains some
# private use codes in planes E and F : doing this has slowed the
# program down.

# uses decimal for the maths, to avoid the runtime overhead of either
# loading bc to calculate the next value in the range
# or of using a double printf (hex->dec, maths. dec->hex) because
# that needs sed to convert lowercase hex to uppercase (or tr, which is
# even slower)

if [ $# -ne 1 ]; then
	echo "Usage: $0 /path/to/font.codepoints"
	exit 1
fi

# the files contain U+.... and U+..... in alphabetic order
# if there are any 5-digit codepoints, write them to a temp file
# and then process that.
SUPP_PLANE=
EXTENDED=/tmp/supp-planes
>$EXTENDED

# if TO unset, no range in store
# else it contains end of range
# and FROM is the start
# NEXT is $DECIMAL+1
FROM=
TO=
NEXT=

# decimal value of current hex value
DECIMAL=

# number of this block
BLOCK=-1

# the block for the current codepoint
NEWBLOCK=

# the blocks file should be in the same directory as this script
SCRIPTS=${0%/*}

if ! [ -r ${SCRIPTS}/unicode-blocks ]; then
	echo "cannot read ${SCRIPTS}/unicode-blocks, oh dear"
	error
fi
. ${SCRIPTS}/unicode-blocks

# functions

error () {
	rm $EXTENDED
	exit 1
}

store () {
	# add this to the range in the store
	TO=$UNICODE
	# update what would be next if the sequence continues
	let NEXT=$DECIMAL+1
}

flush () {
	# write each range on own line, followed by the block name
	if [ $FROM = $TO ]; then
		echo "U+${FROM}"
	else
		echo "U+${FROM}-U+${TO}"
	fi
	TO=
}

decimal () {
	# get decimal value, for shell maths
	DECIMAL=$(printf "%d" 0x${UNICODE})
	if [ $? -ne 0 ]; then	
	echo "debug: unicode is $UNICODE"		
	DECIMAL=0	
	fi	
}

title () {
	echo
	echo ${blocknames[$BLOCK]}
	echo
}

check_block () {
	# get the block index for the glyph we just read
	# force flush if it changes
	# then update block and print the NEW title

	NEWBLOCK=$BLOCK
#echo "block is $BLOCK" >&2
#echo "decimal is $DECIMAL" >&2
	while [ $NEWBLOCK -lt $MAXBLOCK ]
#echo "newblock is $NEWBLOCK and maxblock is $MAXBLOCK" >&2
	do
		if [ $NEWBLOCK -lt 0 ]; then
			# dummy operation, wait for the increment
			true
		else
			UPPER=${blockends[$NEWBLOCK]}
#echo upper end is $UPPER >&2
			let UPPER=$UPPER+0
		#elif [ ${blockends[$NEWBLOCK]} -le $DECIMAL ]; then
#echo comparison of $DECIMAL -lt $UPPER >&2
			if [ $DECIMAL -lt $UPPER ]; then
				break
			fi
		fi
		if [ $NEWBLOCK -eq $MAXBLOCK ]; then
			# kludge
			break
		fi
		let NEWBLOCK+=1
	done

	if [ $NEWBLOCK != $BLOCK ]; then
		if [ -n "$FROM" ]; then
			# not first time - flush, print a blank line
			flush
		fi
		BLOCK=$NEWBLOCK
		# now do the title for what we have just started
		title
		# start the new range
		FROM=
	fi
}

do_glyph () {
	# start a new store, add to store, flush and start new store
	# as appropriate
	# first, get the decimal value of this code
#echo "calling decimal" >&2
	decimal $UNICODE
#echo "calling check_block" >&2
	check_block
	if [ -z "$FROM" ]; then
#echo "new store" >&2
		# nothing in store
		FROM=$UNICODE
		store
	else
		# is this next possible code ?
#echo "expecting $NEXT, found $DECIMAL" >&2
		if [ $DECIMAL = $NEXT ]; then
#echo "code is expected" >&2
			store
		else
#echo "gap" >&2
			# gap - flush what we have
			# but if the block changed, just store
			if [ -n "$TO" ]; then
				flush
			fi
			FROM=$UNICODE
			store
		fi
	fi
}
# main line starts here

if ! [ -r $1 ]; then
	echo "cannot read $1" >&2
	error
fi

FONT=$1
FONT=${FONT%.*}
FONT=${FONT##*/}
echo "code points supported by $FONT"

while read line
do
	UNICODE=$(echo $line | awk '{ print $1 }' | sed 's/U+//')
#echo $line >&2
#echo UNICODE is $UNICODE >&2
	LENGTH=$(echo ${#UNICODE})
	if [ $LENGTH = 4 ]; then
		# basic multilingual plane, do it
		do_glyph
	else
		# supplemental plane:
		# this might come DURING a range, e.g. U+1AEF U+1AEF0 U+1AF0
		# so do NOT flush any range
		SUPP_PLANE=true
		echo $UNICODE >>$EXTENDED
	fi
done  <$1
flush

if [ -n "$SUPP_PLANE" ]; then
FROM=
TO=
NEXT=
PAD=
while read line
do
	UNICODE=$(echo $line | awk '{ print $1 }' | sed 's/U+//')
	do_glyph
done <$EXTENDED
flush
fi
# clean up
rm $EXTENDED
