fix formula for logb in n1570.html
[c-standard] / translit.sh
1 #!/bin/sh
2
3 # assumes utf8 locale..
4 # remove nonascii from the output of pdftotext -layout standard.pdf
5
6 sed '
7 s/\f/(newpage)/g
8 # utf8 fixes
9 s/fi/fi/g
10 s/fl/fl/g
11 s/ff/ff/g
12 s/ffi/ffi/g
13 s/§/!S/g
14 s/©/(C)/g
15 s/—/--/g
16 s/−/-/g
17 s/–/-/g
18 s/∗/*/g
19 s/ˆ/^/g
20 s/〈/</g
21 s/〉/>/g
22 s/⎡/[^/g
23 s/⎤/^]/g
24 s/⎣/[_/g
25 s/⎦/_]/g
26 s/⎢/[ /g
27 s/⎥/ ]/g
28 s/⎧/{/g
29 s/⎨/{/g
30 s/⎩/{/g
31 s/±/(+-)/g
32 s/≤/<=/g
33 s/≥/>=/g
34 s/≠/!=/g
35 s/Σ/(Sum)/g
36 s/√/(sqrt)/g
37 s/π/pi/g
38 s/∞/(inf)/g
39 s/ƒ/fl./g
40 s/∫/(integral)/g
41 s/Γ/(Gamma)/g
42 s/×/x/g
43 s/•/o/g
44 s/⎯/-/g
45 s/↑/(uparrow)/g
46 s/↓/(downarrow)/g
47 s/↔/<->/g
48 s/→/->/g
49 s/‘/'\''/g
50 s/’/'\''/g
51 s/“/"/g
52 s/”/"/g
53 s/∼/~/g
54 # pdftotext layout fixes
55 s/_ _/__/g
56 # floats are sometimes broken
57 s/\([0-9]\)\. \([0-9]\)/\1.\2/g
58 ' | LC_ALL=C tr -c '\n-~' '?' | awk '
59 BEGIN {
60         getline
61         last=$0
62         side=0
63 }
64 /^$/ {
65         nl=nl "\n"
66         next
67 }
68 # TODO: shift page numbers
69 #function inc(x) {
70 #       if (x ~ /[0-9]/)
71 #               return x+1
72 #       if (sub(/viii$/,"ix",x) ||
73 #           sub(/iii$/,"iv",x) ||
74 #           sub(/iv$/,"v",x) ||
75 #           sub(/ix$/,"x",x))
76 #               return x
77 #       return x "i"
78 #}
79 /^\(newpage\)/ {
80         n=split(last,a)
81         if(side)
82                 p=a[1]
83         else
84                 p=a[n]
85         side=!side
86 #       if (p !~ /[0-9]/ && $0 ~ /INTERNATIONAL STANDARD/)
87 #               p=0
88 #       print "\n[page " inc(p) "]"
89         print "\n[page " p "]"
90         getline
91         getline
92         last=$0
93         next
94 }
95 {
96         print last
97         last=nl $0
98         nl=""
99 }'