3 # assumes utf8 locale..
4 # remove nonascii from the output of pdftotext -layout standard.pdf
48 # pdftotext layout fixes
50 # floats are sometimes broken
51 s/\([0-9]\)\. \([0-9]\)/\1.\2/g
52 ' | LC_ALL=C tr -c '\n-~' '?' | awk '
69 print "\n[page " p "]"