#!/bin/bash
# Get a list of the unicode points in a TTF or OTF font.
# Copyright © 2012, 2013 Ken Moffat
# covered by the MIT license, http://opensource.org/licenses/MIT
#
# second version, using fret [ original version sometimes misses glyphs ]
# for ttf files, but the original code for otf files
# third version : using fret also misses glyphs, e.g. in Charis-SIL,
# so try BOTH approaches for ttf fonts!

usage() {
	echo "list the glyphs within a ttf or otf file" >&2
	echo "usage $0 /path/to/a/fontfile [outfile]" >&2
	echo "output is to stdout if no outfile is specified" >&2
	echo "the font file can be TrueType or OpenType" >&2
	exit 1
}

if [ $# -lt 1 ] || [ $# -gt 2 ]; then
	usage
elif [ $# -eq 2 ]; then
	# --unique is only needed on the PDF output
	SORT="sort --unique >$2"
else
	SORT="sort --unique"
fi

if ! [ -f $1 ]; then
	echo "$1 : No such file" >&2
	usage
fi
echo -n "checking input font type " >&2
TYPE=$(file $1)
echo $TYPE | grep Type >&2 2>/dev/null
if [ $? -ne 0 ]; then
		echo "not a TrueType or OpenType font"
		exit
fi
echo $TYPE | grep TrueType >&2
if [ $? -eq 0 ]; then
	# TrueType - use fret to generate a PDF - unfortunately, it cannot write to stdout
	TMP=$(mktemp file-XXXX.pdf)
	echo "using fret" >&2
	fret $1 $TMP 2>/dev/null

	#convert to text, pipe to stdout and extract the U+.... glyph values
	# in grep, + appears to be treated as special unless -G is forced on
	# for some fret PDFs, pdftotext appears to insert '|' in front of U+
	# Also, everything that looks like U+nnnn comes through twice, about
	# 40 lines apart, then gets sorted together.
	TMP2=$(mktemp file-XXXX.txt)
	pdftotext $TMP - 2>/dev/null | sed -e 's/|U/U/' | grep '^U+' | \
	 awk '{ print $1 }' | sort --unique >$TMP2

	rm $TMP
fi
# Do this for both TrueType and OpenType
# use my old method : on _some_ fonts (so far, one non-redistributable ttf)
# this doesn't find all the glyphs, but fret cannot handle otf files
TMP3=$(mktemp file-XXXX.txt)
echo "using ttf2config.pl" >&2
ttf2config.pl $1 | grep UID= | cut -d ' ' -f 3 | cut -d "'" -f2 | \
 awk '{ print "U+" $1 }' | sort >$TMP3

# use one or both results
if [ -f $TMP2 ]; then
	cat $TMP2 $TMP3 | eval $SORT
	rm $TMP2 $TMP3
else
	cat $TMP3 | eval $SORT
	rm $TMP3
fi

