# utf8 fixes
s/fi/fi/g
s/fl/fl/g
+s/ff/ff/g
+s/ffi/ffi/g
s/§/!S/g
s/©/(C)/g
s/—/--/g
s/−/-/g
+s/–/-/g
s/∗/*/g
s/ˆ/^/g
s/〈/</g
s/→/->/g
s/‘/'\''/g
s/’/'\''/g
+s/“/"/g
+s/”/"/g
+s/∼/~/g
# pdftotext layout fixes
s/_ _/__/g
# floats are sometimes broken
nl=nl "\n"
next
}
+# TODO: shift page numbers
+#function inc(x) {
+# if (x ~ /[0-9]/)
+# return x+1
+# if (sub(/viii$/,"ix",x) ||
+# sub(/iii$/,"iv",x) ||
+# sub(/iv$/,"v",x) ||
+# sub(/ix$/,"x",x))
+# return x
+# return x "i"
+#}
/^\(newpage\)/ {
n=split(last,a)
if(side)
else
p=a[n]
side=!side
+# if (p !~ /[0-9]/ && $0 ~ /INTERNATIONAL STANDARD/)
+# p=0
+# print "\n[page " inc(p) "]"
print "\n[page " p "]"
getline
getline