s/≥/>=/g
s/≠/!=/g
s/Σ/(Sum)/g
-s/√/sqrt:/g
+s/√/(sqrt)/g
s/π/pi/g
s/∞/(inf)/g
s/ƒ/fl./g
s/⎯/-/g
s/↑/(uparrow)/g
s/↓/(downarrow)/g
-s/↔/(<->)/g
-s/→/(->)/g
+s/↔/<->/g
+s/→/->/g
s/‘/'\''/g
s/’/'\''/g
# pdftotext layout fixes
s/_ _/__/g
-s/\([0-9]\). \([0-9]\)/\1.\2/g
+# floats are sometimes broken
+s/\([0-9]\)\. \([0-9]\)/\1.\2/g
' | LC_ALL=C tr -c '\n-~' '?' | awk '
BEGIN {
getline
last=$0
side=0
}
+/^$/ {
+ nl=nl "\n"
+ next
+}
+# TODO: shift page numbers
+#function inc(x) {
+# if (x ~ /[0-9]/)
+# return x+1
+# if (sub(/viii$/,"ix",x) ||
+# sub(/iii$/,"iv",x) ||
+# sub(/iv$/,"v",x) ||
+# sub(/ix$/,"x",x))
+# return x
+# return x "i"
+#}
/^\(newpage\)/ {
n=split(last,a)
if(side)
else
p=a[n]
side=!side
- print "[page " p "]"
+# if (p !~ /[0-9]/ && $0 ~ /INTERNATIONAL STANDARD/)
+# p=0
+# print "\n[page " inc(p) "]"
+ print "\n[page " p "]"
getline
getline
last=$0
}
{
print last
- last=$0
-}
-'
+ last=nl $0
+ nl=""
+}'