X-Git-Url: http://nsz.repo.hu/git/?p=c-standard;a=blobdiff_plain;f=translit.sh;h=dce2b08d30e21a7a42df683c22f1a2d53b424d45;hp=0bf85a05a3c524d549f0c71c11702cdb5c4510fd;hb=0b7a1b9887c6d0b4bce83b50a62f872245a303b7;hpb=12582873cbe57925728ccc0e9d12e405d4895600 diff --git a/translit.sh b/translit.sh index 0bf85a0..dce2b08 100755 --- a/translit.sh +++ b/translit.sh @@ -8,10 +8,13 @@ s/\f/(newpage)/g # utf8 fixes s/fi/fi/g s/fl/fl/g +s/ff/ff/g +s/ffi/ffi/g s/§/!S/g s/©/(C)/g s/—/--/g s/−/-/g +s/–/-/g s/∗/*/g s/ˆ/^/g s/〈/=/g s/≠/!=/g s/Σ/(Sum)/g -s/√/sqrt:/g +s/√/(sqrt)/g s/π/pi/g s/∞/(inf)/g s/ƒ/fl./g @@ -41,19 +44,38 @@ s/•/o/g s/⎯/-/g s/↑/(uparrow)/g s/↓/(downarrow)/g -s/↔/(<->)/g -s/→/(->)/g +s/↔/<->/g +s/→/->/g s/‘/'\''/g s/’/'\''/g +s/“/"/g +s/”/"/g +s/∼/~/g # pdftotext layout fixes s/_ _/__/g -s/\([0-9]\). \([0-9]\)/\1.\2/g +# floats are sometimes broken +s/\([0-9]\)\. \([0-9]\)/\1.\2/g ' | LC_ALL=C tr -c '\n-~' '?' | awk ' BEGIN { getline last=$0 side=0 } +/^$/ { + nl=nl "\n" + next +} +# TODO: shift page numbers +#function inc(x) { +# if (x ~ /[0-9]/) +# return x+1 +# if (sub(/viii$/,"ix",x) || +# sub(/iii$/,"iv",x) || +# sub(/iv$/,"v",x) || +# sub(/ix$/,"x",x)) +# return x +# return x "i" +#} /^\(newpage\)/ { n=split(last,a) if(side) @@ -61,7 +83,10 @@ BEGIN { else p=a[n] side=!side - print "[page " p "]" +# if (p !~ /[0-9]/ && $0 ~ /INTERNATIONAL STANDARD/) +# p=0 +# print "\n[page " inc(p) "]" + print "\n[page " p "]" getline getline last=$0 @@ -69,6 +94,6 @@ BEGIN { } { print last - last=$0 -} -' + last=nl $0 + nl="" +}'